From c7437c1452a312a6a9344029b0164311861f327e Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Thu, 29 May 2025 23:49:41 -0300
Subject: [PATCH 1/9] e2e/deps: enhance test scenarios with NLB

---
 tests/e2e/go.mod | 14 ++++++++++++++
 tests/e2e/go.sum | 28 ++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/tests/e2e/go.mod b/tests/e2e/go.mod
index 873f84da33..a5367e3589 100644
--- a/tests/e2e/go.mod
+++ b/tests/e2e/go.mod
@@ -3,6 +3,9 @@ module k8s.io/cloud-provider-aws/tests/e2e
 go 1.24.9
 
 require (
+	github.com/aws/aws-sdk-go-v2 v1.36.3
+	github.com/aws/aws-sdk-go-v2/config v1.29.14
+	github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.45.2
 	github.com/onsi/ginkgo/v2 v2.9.4
 	github.com/onsi/gomega v1.27.6
 	k8s.io/api v0.26.0
@@ -13,6 +16,17 @@ require (
 )
 
 require (
+	github.com/aws/aws-sdk-go-v2/credentials v1.17.67 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect
+	github.com/aws/smithy-go v1.22.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/blang/semver/v4 v4.0.0 // indirect
 	github.com/cenkalti/backoff/v4 v4.1.3 // indirect
diff --git a/tests/e2e/go.sum b/tests/e2e/go.sum
index 663324c896..4e8455f1bf 100644
--- a/tests/e2e/go.sum
+++ b/tests/e2e/go.sum
@@ -42,6 +42,34 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5
 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
 github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
+github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM=
+github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg=
+github.com/aws/aws-sdk-go-v2/config v1.29.14 h1:f+eEi/2cKCg9pqKBoAIwRGzVb70MRKqWX4dg1BDcSJM=
+github.com/aws/aws-sdk-go-v2/config v1.29.14/go.mod h1:wVPHWcIFv3WO89w0rE10gzf17ZYy+UVS1Geq8Iei34g=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.67 h1:9KxtdcIA/5xPNQyZRgUSpYOE6j9Bc4+D7nZua0KGYOM=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.67/go.mod h1:p3C44m+cfnbv763s52gCqrjaqyPikj9Sg47kUVaNZQQ=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
+github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.45.2 h1:vX70Z4lNSr7XsioU0uJq5yvxgI50sB66MvD+V/3buS4=
+github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2 v1.45.2/go.mod h1:xnCC3vFBfOKpU6PcsCKL2ktgBTZfOwTGxj6V8/X3IS4=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 h1:1Gw+9ajCV1jogloEv1RRnvfRFia2cL6c9cuKV2Ps+G8=
+github.com/aws/aws-sdk-go-v2/service/sso v1.25.3/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 h1:hXmVKytPfTy5axZ+fYbR5d0cFmC3JvwLm5kM83luako=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs=
+github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 h1:1XuUZ8mYJw9B6lzAkXhqHlJd/XvaX32evhproijJEZY=
+github.com/aws/aws-sdk-go-v2/service/sts v1.33.19/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4=
+github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
+github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
 github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=

From 0cde97ee0152ce6e2220732d3acbf321567817fd Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Thu, 29 May 2025 23:50:06 -0300
Subject: [PATCH 2/9] e2e: enhance test scenarios with NLB

This change enhance test scenarios by:
- supporting more distributions which does not allow pods to bind on
  privileged ports (default behavior of libjig, see issue
- refact tests to allow adding more cases
- introduce tests to NLB, including advanced tests to validate the node
  selector annotation. AWS SDK is added to satisfy this validatoin.
---
 tests/e2e/loadbalancer.go | 373 ++++++++++++++++++++++++++++++++++----
 1 file changed, 337 insertions(+), 36 deletions(-)

diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go
index 79b0e2625e..bd786b6aae 100644
--- a/tests/e2e/loadbalancer.go
+++ b/tests/e2e/loadbalancer.go
@@ -14,15 +14,44 @@ limitations under the License.
 package e2e
 
 import (
+	"context"
+	"fmt"
+	"strings"
+
 	. "github.com/onsi/ginkgo/v2"
 	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2eservice "k8s.io/kubernetes/test/e2e/framework/service"
+	imageutils "k8s.io/kubernetes/test/utils/image"
 	admissionapi "k8s.io/pod-security-admission/api"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/config"
+	elbv2 "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2"
+)
+
+const (
+	annotationLBType             = "service.beta.kubernetes.io/aws-load-balancer-type"
+	annotationLBTargetNodeLabels = "service.beta.kubernetes.io/aws-load-balancer-target-node-labels"
+)
+
+var (
+	// clusterNodeSelector is the discovered node(compute/worker) selector used in the cluster.
+	clusterNodesSelector string
+	clusterNodesCount    int = 0
+
+	// lookupNodeSelectors are valid compute/node/worker selectors commonly used in different kubernetes
+	// distributions.
+	lookupNodeSelectors = []string{
+		"node-role.kubernetes.io/worker", // used in must distributions
+		"node-role.kubernetes.io/node",   // used in ccm-aws CI
+	}
 )
 
+// loadbalancer tests
 var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 	f := framework.NewDefaultFramework("cloud-provider-aws")
 	f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
@@ -41,61 +70,333 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 		// After each test
 	})
 
-	It("should configure the loadbalancer based on annotations", func() {
-		loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
-		framework.Logf("Running tests against AWS with timeout %s", loadBalancerCreateTimeout)
+	type loadBalancerTestCases struct {
+		Name              string
+		ResourceSuffix    string
+		Annotations       map[string]string
+		PostConfigService func(cfg *configServiceLB, svc *v1.Service)
+		PostRunValidation func(cfg *configServiceLB, svc *v1.Service)
+	}
+	cases := []loadBalancerTestCases{
+		{
+			Name:           "should configure the loadbalancer based on annotations",
+			ResourceSuffix: "",
+			Annotations:    map[string]string{},
+		},
+		{
+			Name:           "NLB should configure the loadbalancer based on annotations",
+			ResourceSuffix: "nlb",
+			Annotations: map[string]string{
+				annotationLBType: "nlb",
+			},
+		},
+		{
+			Name:           "NLB should configure the loadbalancer with target-node-labels",
+			ResourceSuffix: "sg-nd",
+			Annotations: map[string]string{
+				annotationLBType: "nlb",
+			},
+			PostConfigService: func(cfg *configServiceLB, svc *v1.Service) {
+				// discover clusterNodeSelector and patch service
+				// TODO: move to external function if there are more scenarios to discover nodes.
+				By("discovering node label used in the kubernetes distributions")
+				for _, selector := range lookupNodeSelectors {
+					nodeList, err := cs.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
+						LabelSelector: selector,
+					})
+					framework.ExpectNoError(err, "failed to list worker nodes")
+					if len(nodeList.Items) > 0 {
+						clusterNodesCount = len(nodeList.Items)
+						clusterNodesSelector = selector
+						break
+					}
+				}
+
+				if clusterNodesCount == 0 {
+					framework.ExpectNoError(fmt.Errorf("unable to find node selector for %v", lookupNodeSelectors))
+				}
+
+				By(fmt.Sprintf("found %d nodes with selector %q\n", clusterNodesCount, clusterNodesSelector))
+				if svc.Annotations == nil {
+					svc.Annotations = map[string]string{}
+				}
+				svc.Annotations[annotationLBTargetNodeLabels] = clusterNodesSelector
+				By(fmt.Sprintf("using service with annotations: %v", svc.Annotations))
+			},
+			PostRunValidation: func(cfg *configServiceLB, svc *v1.Service) {
+				// Validate in the TG if the node count matches with expected target-node-labels selector.
+				if len(svc.Status.LoadBalancer.Ingress) == 0 {
+					framework.Failf("No ingress found in LoadBalancer status for service %s/%s", svc.Namespace, svc.Name)
+				}
+				lbDNS := svc.Status.LoadBalancer.Ingress[0].Hostname
+				framework.ExpectNoError(getLBTargetCount(context.TODO(), lbDNS, clusterNodesCount), "AWS LB target count validation failed")
+			},
+		},
+	}
+
+	serviceNameBase := "lbconfig-test"
+	for _, tc := range cases {
+		It(tc.Name, func() {
+			loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
+			framework.Logf("Running tests against AWS with timeout %s", loadBalancerCreateTimeout)
+
+			// Create Configuration
+			serviceName := serviceNameBase
+			if len(tc.ResourceSuffix) > 0 {
+				serviceName = serviceName + "-" + tc.ResourceSuffix
+			}
+			framework.Logf("namespace for load balancer conig test: %s", ns.Name)
+
+			By("creating a TCP service " + serviceName + " with type=LoadBalancerType in namespace " + ns.Name)
+			lbConfig := newConfigServiceLB()
+			lbConfig.LBJig = e2eservice.NewTestJig(cs, ns.Name, serviceName)
+			lbServiceConfig := lbConfig.buildService(tc.Annotations)
+
+			// Hook: PostConfigService patchs service configuration.
+			if tc.PostConfigService != nil {
+				tc.PostConfigService(lbConfig, lbServiceConfig)
+			}
+
+			// Create Load Balancer
+			By("creating loadbalancer for service " + lbServiceConfig.Namespace + "/" + lbServiceConfig.Name)
+			if _, err := lbConfig.LBJig.Client.CoreV1().Services(lbConfig.LBJig.Namespace).Create(context.TODO(), lbServiceConfig, metav1.CreateOptions{}); err != nil {
+				framework.ExpectNoError(fmt.Errorf("failed to create LoadBalancer Service %q: %v", lbServiceConfig.Name, err))
+			}
+
+			By("waiting for loadbalancer for service " + lbServiceConfig.Namespace + "/" + lbServiceConfig.Name)
+			lbService, err := lbConfig.LBJig.WaitForLoadBalancer(loadBalancerCreateTimeout)
+			framework.ExpectNoError(err)
 
-		serviceName := "lbconfig-test"
-		framework.Logf("namespace for load balancer conig test: %s", ns.Name)
+			// Run Workloads
+			By("creating a pod to be part of the TCP service " + serviceName)
+			_, err = lbConfig.LBJig.Run(lbConfig.buildReplicationController())
+			framework.ExpectNoError(err)
 
-		By("creating a TCP service " + serviceName + " with type=LoadBalancerType in namespace " + ns.Name)
-		lbJig := e2eservice.NewTestJig(cs, ns.Name, serviceName)
+			// Hook: PostRunValidation performs LB validations after it is created (before test).
+			if tc.PostRunValidation != nil {
+				By("running post run validations")
+				tc.PostRunValidation(lbConfig, lbService)
+			}
+
+			// Test the Service Endpoint
+			By("hitting the TCP service's LB External IP")
+			if len(lbService.Spec.Ports) == 0 {
+				framework.Failf("No ports found in service spec for service %s/%s", lbService.Namespace, lbService.Name)
+			}
+			if len(lbService.Status.LoadBalancer.Ingress) == 0 {
+				framework.Failf("No ingress found in LoadBalancer status for service %s/%s", lbService.Namespace, lbService.Name)
+			}
+			svcPort := int(lbService.Spec.Ports[0].Port)
+			ingressIP := e2eservice.GetIngressPoint(&lbService.Status.LoadBalancer.Ingress[0])
+			framework.Logf("Load balancer's ingress IP: %s", ingressIP)
+
+			e2eservice.TestReachableHTTP(ingressIP, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
 
-		serviceUpdateFunc := func(svc *v1.Service) {
-			annotations := make(map[string]string)
-			annotations["aws-load-balancer-backend-protocol"] = "http"
-			annotations["aws-load-balancer-ssl-ports"] = "https"
+			// Update the service to cluster IP
+			By("changing TCP service back to type=ClusterIP")
+			_, err = lbConfig.LBJig.UpdateService(func(s *v1.Service) {
+				s.Spec.Type = v1.ServiceTypeClusterIP
+			})
+			framework.ExpectNoError(err)
 
-			svc.Annotations = annotations
-			svc.Spec.Ports = []v1.ServicePort{
+			// Wait for the load balancer to be destroyed asynchronously
+			_, err = lbConfig.LBJig.WaitForLoadBalancerDestroy(ingressIP, svcPort, loadBalancerCreateTimeout)
+			framework.ExpectNoError(err)
+		})
+	}
+})
+
+// configServiceLB hold loadbalancer test configurations used by e2e lib (jig).
+type configServiceLB struct {
+	PodPort            uint16
+	PodProtocol        v1.Protocol
+	DefaultAnnotations map[string]string
+
+	LBJig *e2eservice.TestJig
+}
+
+func newConfigServiceLB() *configServiceLB {
+	return &configServiceLB{
+		PodPort:     8080,
+		PodProtocol: v1.ProtocolTCP,
+		DefaultAnnotations: map[string]string{
+			"aws-load-balancer-backend-protocol": "http",
+			"aws-load-balancer-ssl-ports":        "https",
+		},
+	}
+}
+
+// buildService creates a service instance with custom annotations.
+func (s *configServiceLB) buildService(extraAnnotations map[string]string) *v1.Service {
+	svc := &v1.Service{
+		ObjectMeta: metav1.ObjectMeta{
+			Namespace:   s.LBJig.Namespace,
+			Name:        s.LBJig.Name,
+			Labels:      s.LBJig.Labels,
+			Annotations: make(map[string]string, len(s.DefaultAnnotations)+len(extraAnnotations)),
+		},
+		Spec: v1.ServiceSpec{
+			Type:            v1.ServiceTypeLoadBalancer,
+			SessionAffinity: v1.ServiceAffinityNone,
+			Selector:        s.LBJig.Labels,
+			Ports: []v1.ServicePort{
 				{
 					Name:       "http",
 					Protocol:   v1.ProtocolTCP,
 					Port:       int32(80),
-					TargetPort: intstr.FromInt(80),
+					TargetPort: intstr.FromInt(int(s.PodPort)),
 				},
 				{
 					Name:       "https",
 					Protocol:   v1.ProtocolTCP,
 					Port:       int32(443),
-					TargetPort: intstr.FromInt(80),
+					TargetPort: intstr.FromInt(int(s.PodPort)),
 				},
-			}
+			},
+		},
+	}
+
+	// add default annotations - can be overriden by extra annotations
+	for aK, aV := range s.DefaultAnnotations {
+		svc.Annotations[aK] = aV
+	}
+
+	// append test case annotations to the service
+	for aK, aV := range extraAnnotations {
+		svc.Annotations[aK] = aV
+	}
+
+	// Defensive: ensure Annotations is not nil
+	if svc.Annotations == nil {
+		svc.Annotations = map[string]string{}
+	}
+
+	return svc
+}
+
+// buildReplicationController creates a replication controller wrapper for the test framework.
+// buildReplicationController is basaed on newRCTemplate() from the test, which not provide
+// customization to bind in non-privileged ports.
+// TODO(mtulio): v1.33+[2] moved from RC to Deployments on tests, we must do the same to use Run()
+// when the test framework is updated.
+// [1] https://github.com/kubernetes/kubernetes/blob/89d95c9713a8fd189e8ad555120838b3c4f888d1/test/e2e/framework/service/jig.go#L636
+// [2] https://github.com/kubernetes/kubernetes/issues/119021
+func (s *configServiceLB) buildReplicationController() func(rc *v1.ReplicationController) {
+	return func(rc *v1.ReplicationController) {
+		var replicas int32 = 1
+		var grace int64 = 3 // so we don't race with kube-proxy when scaling up/down
+		rc.ObjectMeta = metav1.ObjectMeta{
+			Namespace: s.LBJig.Namespace,
+			Name:      s.LBJig.Name,
+			Labels:    s.LBJig.Labels,
 		}
+		rc.Spec = v1.ReplicationControllerSpec{
+			Replicas: &replicas,
+			Selector: s.LBJig.Labels,
+			Template: &v1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: s.LBJig.Labels,
+				},
+				Spec: v1.PodSpec{
+					Containers: []v1.Container{
+						{
+							Name:  "netexec",
+							Image: imageutils.GetE2EImage(imageutils.Agnhost),
+							Args: []string{
+								"netexec",
+								fmt.Sprintf("--http-port=%d", s.PodPort),
+								fmt.Sprintf("--udp-port=%d", s.PodPort),
+							},
+							ReadinessProbe: &v1.Probe{
+								PeriodSeconds: 3,
+								ProbeHandler: v1.ProbeHandler{
+									HTTPGet: &v1.HTTPGetAction{
+										Port: intstr.FromInt(int(s.PodPort)),
+										Path: "/hostName",
+									},
+								},
+							},
+						},
+					},
+					TerminationGracePeriodSeconds: &grace,
+				},
+			},
+		}
+	}
+}
 
-		lbService, err := lbJig.CreateLoadBalancerService(loadBalancerCreateTimeout, serviceUpdateFunc)
-		framework.ExpectNoError(err)
+// getLBTargetCount verifies the number of registered targets for a given LBv2 DNS name matches the expected count.
+// The steps includes:
+// - Get Load Balancer ARN from DNS name extracted from service Status.LoadBalancer.Ingress[0].Hostname
+// - List listeners for the load balancer
+// - Get target groups attached to listeners
+// - Count registered targets in target groups
+// - Verify count matches number of worker nodes
+func getLBTargetCount(ctx context.Context, lbDNSName string, expectedTargets int) error {
+	// Load AWS config
+	cfg, err := config.LoadDefaultConfig(ctx)
+	if err != nil {
+		return fmt.Errorf("unable to load AWS config: %v", err)
+	}
+	elbClient := elbv2.NewFromConfig(cfg)
 
-		By("creating a pod to be part of the TCP service " + serviceName)
-		_, err = lbJig.Run(nil)
-		framework.ExpectNoError(err)
+	// Get Load Balancer ARN from DNS name
+	describeLBs, err := elbClient.DescribeLoadBalancers(ctx, &elbv2.DescribeLoadBalancersInput{})
+	if err != nil {
+		return fmt.Errorf("failed to describe load balancers: %v", err)
+	}
+	var lbARN string
+	for _, lb := range describeLBs.LoadBalancers {
+		if strings.EqualFold(aws.ToString(lb.DNSName), lbDNSName) {
+			lbARN = aws.ToString(lb.LoadBalancerArn)
+			break
+		}
+	}
+	if lbARN == "" {
+		return fmt.Errorf("could not find LB with DNS name: %s", lbDNSName)
+	}
 
-		By("hitting the TCP service's LB External IP")
-		svcPort := int(lbService.Spec.Ports[0].Port)
-		ingressIP := e2eservice.GetIngressPoint(&lbService.Status.LoadBalancer.Ingress[0])
-		framework.Logf("Load balancer's ingress IP: %s", ingressIP)
+	// List listeners for the load balancer
+	listenersOut, err := elbClient.DescribeListeners(ctx, &elbv2.DescribeListenersInput{
+		LoadBalancerArn: aws.String(lbARN),
+	})
+	if err != nil {
+		return fmt.Errorf("failed to describe listeners: %v", err)
+	}
 
-		e2eservice.TestReachableHTTP(ingressIP, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
+	// Get target groups attached to listeners
+	targetGroupARNs := map[string]struct{}{}
+	for _, listener := range listenersOut.Listeners {
+		if len(targetGroupARNs) > 0 {
+			break
+		}
+		for _, action := range listener.DefaultActions {
+			if action.TargetGroupArn != nil {
+				targetGroupARNs[aws.ToString(action.TargetGroupArn)] = struct{}{}
+				break
+			}
+		}
+	}
+
+	if len(targetGroupARNs) == 0 {
+		return fmt.Errorf("no target groups found for LB: %s", lbARN)
+	}
 
-		// Update the service to cluster IP
-		By("changing TCP service back to type=ClusterIP")
-		_, err = lbJig.UpdateService(func(s *v1.Service) {
-			s.Spec.Type = v1.ServiceTypeClusterIP
+	// Count registered targets in target groups
+	totalTargets := 0
+	for tgARN := range targetGroupARNs {
+		tgHealth, err := elbClient.DescribeTargetHealth(ctx, &elbv2.DescribeTargetHealthInput{
+			TargetGroupArn: aws.String(tgARN),
 		})
-		framework.ExpectNoError(err)
+		if err != nil {
+			return fmt.Errorf("failed to describe target health for TG %s: %v", tgARN, err)
+		}
+		totalTargets += len(tgHealth.TargetHealthDescriptions)
+	}
 
-		// Wait for the load balancer to be destroyed asynchronously
-		_, err = lbJig.WaitForLoadBalancerDestroy(ingressIP, svcPort, loadBalancerCreateTimeout)
-		framework.ExpectNoError(err)
-	})
-})
+	// Verify count matches number of worker nodes
+	if totalTargets != expectedTargets {
+		return fmt.Errorf("target count mismatch: expected %d, got %d", expectedTargets, totalTargets)
+	}
+	return nil
+}

From 85eb52a57e3b23c96eed41edb17ddf57cff74c3d Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Sat, 14 Jun 2025 00:29:31 -0300
Subject: [PATCH 3/9] e2e/loadbalancer: implement hairpin connection cases

Implementing the hairpin connection test cases, and exposing an issue on
NLB with internal scheme which fails when the client is trying
to access a service loadbalancer which is hosted in the same node.

The hairpin connection is caused by the client IP preservation attribute
is set to true (default), and the service does not provide an interface
to prevent the issue.

The e2e is expecting to pass to prevent permanent failures in CI, but it
is tracked by an issue https://github.com/kubernetes/cloud-provider-aws/issues/1160.
---
 tests/e2e/loadbalancer.go | 360 +++++++++++++++++++++++++++++++++-----
 1 file changed, 314 insertions(+), 46 deletions(-)

diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go
index bd786b6aae..ec6ba18b59 100644
--- a/tests/e2e/loadbalancer.go
+++ b/tests/e2e/loadbalancer.go
@@ -16,12 +16,15 @@ package e2e
 import (
 	"context"
 	"fmt"
+	"sort"
 	"strings"
+	"time"
 
 	. "github.com/onsi/ginkgo/v2"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/intstr"
+	"k8s.io/apimachinery/pkg/util/wait"
 	clientset "k8s.io/client-go/kubernetes"
 	"k8s.io/kubernetes/test/e2e/framework"
 	e2eservice "k8s.io/kubernetes/test/e2e/framework/service"
@@ -39,10 +42,6 @@ const (
 )
 
 var (
-	// clusterNodeSelector is the discovered node(compute/worker) selector used in the cluster.
-	clusterNodesSelector string
-	clusterNodesCount    int = 0
-
 	// lookupNodeSelectors are valid compute/node/worker selectors commonly used in different kubernetes
 	// distributions.
 	lookupNodeSelectors = []string{
@@ -51,6 +50,13 @@ var (
 	}
 )
 
+// ClusterNodeDiscovery holds information about discovered worker nodes.
+type ClusterNodeDiscovery struct {
+	Selector     string
+	Count        int
+	SingleWorker string
+}
+
 // loadbalancer tests
 var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 	f := framework.NewDefaultFramework("cloud-provider-aws")
@@ -71,11 +77,14 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 	})
 
 	type loadBalancerTestCases struct {
-		Name              string
-		ResourceSuffix    string
-		Annotations       map[string]string
-		PostConfigService func(cfg *configServiceLB, svc *v1.Service)
-		PostRunValidation func(cfg *configServiceLB, svc *v1.Service)
+		Name                           string
+		ResourceSuffix                 string
+		Annotations                    map[string]string
+		PostConfigService              func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery)
+		PostRunValidation              func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery)
+		HookInClusterTestReachableHTTP bool
+		RequireAffinity                bool
+		SkipTestFailure                bool
 	}
 	cases := []loadBalancerTestCases{
 		{
@@ -93,50 +102,68 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 		{
 			Name:           "NLB should configure the loadbalancer with target-node-labels",
 			ResourceSuffix: "sg-nd",
-			Annotations: map[string]string{
-				annotationLBType: "nlb",
-			},
-			PostConfigService: func(cfg *configServiceLB, svc *v1.Service) {
-				// discover clusterNodeSelector and patch service
-				// TODO: move to external function if there are more scenarios to discover nodes.
-				By("discovering node label used in the kubernetes distributions")
-				for _, selector := range lookupNodeSelectors {
-					nodeList, err := cs.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
-						LabelSelector: selector,
-					})
-					framework.ExpectNoError(err, "failed to list worker nodes")
-					if len(nodeList.Items) > 0 {
-						clusterNodesCount = len(nodeList.Items)
-						clusterNodesSelector = selector
-						break
-					}
-				}
-
-				if clusterNodesCount == 0 {
-					framework.ExpectNoError(fmt.Errorf("unable to find node selector for %v", lookupNodeSelectors))
-				}
-
-				By(fmt.Sprintf("found %d nodes with selector %q\n", clusterNodesCount, clusterNodesSelector))
+			Annotations:    map[string]string{annotationLBType: "nlb"},
+			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
+				By(fmt.Sprintf("found %d nodes with selector %q\n", nodeDiscovery.Count, nodeDiscovery.Selector))
 				if svc.Annotations == nil {
 					svc.Annotations = map[string]string{}
 				}
-				svc.Annotations[annotationLBTargetNodeLabels] = clusterNodesSelector
+				svc.Annotations[annotationLBTargetNodeLabels] = nodeDiscovery.Selector
 				By(fmt.Sprintf("using service with annotations: %v", svc.Annotations))
 			},
-			PostRunValidation: func(cfg *configServiceLB, svc *v1.Service) {
-				// Validate in the TG if the node count matches with expected target-node-labels selector.
+			PostRunValidation: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
 				if len(svc.Status.LoadBalancer.Ingress) == 0 {
 					framework.Failf("No ingress found in LoadBalancer status for service %s/%s", svc.Namespace, svc.Name)
 				}
 				lbDNS := svc.Status.LoadBalancer.Ingress[0].Hostname
-				framework.ExpectNoError(getLBTargetCount(context.TODO(), lbDNS, clusterNodesCount), "AWS LB target count validation failed")
+				framework.ExpectNoError(getLBTargetCount(context.TODO(), lbDNS, nodeDiscovery.Count), "AWS LB target count validation failed")
 			},
 		},
+		// hairpin connection tests for internal CLB and NLB services LBs.
+		{
+			Name:           "internal should support hairpin connection",
+			ResourceSuffix: "hp-clb-int",
+			Annotations: map[string]string{
+				"service.beta.kubernetes.io/aws-load-balancer-internal": "true",
+			},
+			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
+				if svc.Annotations == nil {
+					svc.Annotations = map[string]string{}
+				}
+				svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", nodeDiscovery.SingleWorker)
+				framework.Logf("Using service annotations: %v", svc.Annotations)
+			},
+			HookInClusterTestReachableHTTP: true,
+			RequireAffinity:                true,
+		},
+		// FIXME: https://github.com/kubernetes/cloud-provider-aws/issues/1160
+		// Hairpin connection work with target type as instance only when preserve client IP is disabled.
+		// Currently CCM does not provide an interface to create a service with that setup, making an internal
+		// Service to fail.
+		{
+			Name:           "NLB internal should support hairpin connection",
+			ResourceSuffix: "hp-nlb-int",
+			Annotations: map[string]string{
+				annotationLBType: "nlb",
+				"service.beta.kubernetes.io/aws-load-balancer-internal": "true",
+			},
+			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
+				if svc.Annotations == nil {
+					svc.Annotations = map[string]string{}
+				}
+				svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", nodeDiscovery.SingleWorker)
+				framework.Logf("Using service annotations: %v", svc.Annotations)
+			},
+			HookInClusterTestReachableHTTP: true,
+			RequireAffinity:                true,
+			SkipTestFailure:                true,
+		},
 	}
 
 	serviceNameBase := "lbconfig-test"
 	for _, tc := range cases {
 		It(tc.Name, func() {
+			nodeDiscovery := discoverClusterWorkerNode(cs)
 			loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
 			framework.Logf("Running tests against AWS with timeout %s", loadBalancerCreateTimeout)
 
@@ -150,11 +177,13 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			By("creating a TCP service " + serviceName + " with type=LoadBalancerType in namespace " + ns.Name)
 			lbConfig := newConfigServiceLB()
 			lbConfig.LBJig = e2eservice.NewTestJig(cs, ns.Name, serviceName)
+
+			// Hook annotations to support dynamic config
 			lbServiceConfig := lbConfig.buildService(tc.Annotations)
 
 			// Hook: PostConfigService patchs service configuration.
 			if tc.PostConfigService != nil {
-				tc.PostConfigService(lbConfig, lbServiceConfig)
+				tc.PostConfigService(lbConfig, lbServiceConfig, nodeDiscovery)
 			}
 
 			// Create Load Balancer
@@ -169,13 +198,13 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 
 			// Run Workloads
 			By("creating a pod to be part of the TCP service " + serviceName)
-			_, err = lbConfig.LBJig.Run(lbConfig.buildReplicationController())
+			_, err = lbConfig.LBJig.Run(lbConfig.buildReplicationController(tc.RequireAffinity, nodeDiscovery))
 			framework.ExpectNoError(err)
 
 			// Hook: PostRunValidation performs LB validations after it is created (before test).
 			if tc.PostRunValidation != nil {
 				By("running post run validations")
-				tc.PostRunValidation(lbConfig, lbService)
+				tc.PostRunValidation(lbConfig, lbService, nodeDiscovery)
 			}
 
 			// Test the Service Endpoint
@@ -190,7 +219,16 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			ingressIP := e2eservice.GetIngressPoint(&lbService.Status.LoadBalancer.Ingress[0])
 			framework.Logf("Load balancer's ingress IP: %s", ingressIP)
 
-			e2eservice.TestReachableHTTP(ingressIP, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
+			// Hook: HookInClusterTestReachableHTTP changes the default test function to run the client in the cluster.
+			if tc.HookInClusterTestReachableHTTP {
+				err := inClusterTestReachableHTTP(cs, ns.Name, nodeDiscovery.SingleWorker, ingressIP, svcPort)
+				if err != nil && tc.SkipTestFailure {
+					Skip(err.Error())
+				}
+				framework.ExpectNoError(err)
+			} else {
+				e2eservice.TestReachableHTTP(ingressIP, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
+			}
 
 			// Update the service to cluster IP
 			By("changing TCP service back to type=ClusterIP")
@@ -275,16 +313,17 @@ func (s *configServiceLB) buildService(extraAnnotations map[string]string) *v1.S
 }
 
 // buildReplicationController creates a replication controller wrapper for the test framework.
-// buildReplicationController is basaed on newRCTemplate() from the test, which not provide
+// buildReplicationController is based on newRCTemplate() from the e2e test framework, which not provide
 // customization to bind in non-privileged ports.
-// TODO(mtulio): v1.33+[2] moved from RC to Deployments on tests, we must do the same to use Run()
+// TODO(mtulio): v1.33+[2][3] moved from RC to Deployments on tests, we must do the same to use Run()
 // when the test framework is updated.
 // [1] https://github.com/kubernetes/kubernetes/blob/89d95c9713a8fd189e8ad555120838b3c4f888d1/test/e2e/framework/service/jig.go#L636
 // [2] https://github.com/kubernetes/kubernetes/issues/119021
-func (s *configServiceLB) buildReplicationController() func(rc *v1.ReplicationController) {
+// [3] https://github.com/kubernetes/cloud-provider-aws/blob/master/tests/e2e/go.mod#L14
+func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscovery ClusterNodeDiscovery) func(rc *v1.ReplicationController) {
 	return func(rc *v1.ReplicationController) {
 		var replicas int32 = 1
-		var grace int64 = 3 // so we don't race with kube-proxy when scaling up/down
+		var grace int64 = 3
 		rc.ObjectMeta = metav1.ObjectMeta{
 			Namespace: s.LBJig.Namespace,
 			Name:      s.LBJig.Name,
@@ -322,6 +361,25 @@ func (s *configServiceLB) buildReplicationController() func(rc *v1.ReplicationCo
 				},
 			},
 		}
+		if affinity {
+			rc.Spec.Template.Spec.Affinity = &v1.Affinity{
+				NodeAffinity: &v1.NodeAffinity{
+					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
+						NodeSelectorTerms: []v1.NodeSelectorTerm{
+							{
+								MatchExpressions: []v1.NodeSelectorRequirement{
+									{
+										Key:      "kubernetes.io/hostname",
+										Operator: v1.NodeSelectorOpIn,
+										Values:   []string{nodeDiscovery.SingleWorker},
+									},
+								},
+							},
+						},
+					},
+				},
+			}
+		}
 	}
 }
 
@@ -377,7 +435,6 @@ func getLBTargetCount(ctx context.Context, lbDNSName string, expectedTargets int
 			}
 		}
 	}
-
 	if len(targetGroupARNs) == 0 {
 		return fmt.Errorf("no target groups found for LB: %s", lbARN)
 	}
@@ -400,3 +457,214 @@ func getLBTargetCount(ctx context.Context, lbDNSName string, expectedTargets int
 	}
 	return nil
 }
+
+// discoverClusterWorkerNode identifies and selects worker nodes in the cluster based on predefined node label selectors.
+// It returns a ClusterNodeDiscovery struct with the discovered information.
+func discoverClusterWorkerNode(cs clientset.Interface) ClusterNodeDiscovery {
+	var workerNodeList []string
+	framework.Logf("discovering node label used in the kubernetes distributions")
+	for _, selector := range lookupNodeSelectors {
+		nodeList, err := cs.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
+			LabelSelector: selector,
+		})
+		framework.ExpectNoError(err, "failed to list worker nodes")
+		if len(nodeList.Items) > 0 {
+			for _, node := range nodeList.Items {
+				workerNodeList = append(workerNodeList, node.Name)
+			}
+			// Save the first worker node in the list to be used in cases.
+			sort.Strings(workerNodeList)
+			return ClusterNodeDiscovery{
+				Selector:     selector,
+				Count:        len(nodeList.Items),
+				SingleWorker: workerNodeList[0],
+			}
+		}
+	}
+	framework.ExpectNoError(fmt.Errorf("unable to find node selector for %v", lookupNodeSelectors))
+	return ClusterNodeDiscovery{}
+}
+
+// inClusterTestReachableHTTP creates a pod within the cluster to test HTTP connectivity to a target IP and port.
+// It schedules the pod on the specified node using node affinity to test the hairpin scenario.
+// The pod uses a curl-based container to perform the HTTP request and validates the response.
+// The function waits for the pod to complete its execution and inspects its exit code to determine success or failure.
+//
+// Parameters:
+// - cs: Kubernetes clientset interface used to interact with the cluster.
+// - namespace: The namespace in which the test pod will be created.
+// - nodeName: The name of the node where the test pod should be scheduled.
+// - target: The IP address or Hostname of the target HTTP server.
+// - targetPort: The port number of the target HTTP server.
+//
+// Returns:
+// - error: Returns an error if the pod creation, execution, or cleanup fails, or if the HTTP test fails unexpectedly.
+//
+// Behavior:
+// - The function creates a pod with a curl-based container to perform the HTTP request.
+// - It configures the pod to run as a non-root user with security settings.
+// - The pod is scheduled on the specified node using node affinity.
+// - Logs are periodically collected during the pod's execution for troubleshooting.
+// - Events are inspected if the pod remains in a pending state for too long.
+// - The function waits for the pod to complete and inspects its exit code to determine success or failure.
+// - If the pod fails, an error is returned.
+// - The pod is cleaned up after the test completes.
+func inClusterTestReachableHTTP(cs clientset.Interface, namespace, nodeName, target string, targetPort int) error {
+	podName := "http-test-pod"
+
+	// client http test (curl) pod spec.
+	pod := &v1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      podName,
+			Namespace: namespace,
+		},
+		Spec: v1.PodSpec{
+			Containers: []v1.Container{
+				{
+					Name:    "curl",
+					Image:   imageutils.GetE2EImage(imageutils.Agnhost),
+					Command: []string{"curl"},
+					Args: []string{
+						"--retry", "15", // Retry up to 15 times in case of transient network issues.
+						"--retry-delay", "20", // Wait 20 seconds between retries.
+						"--retry-max-time", "480", // Maximum time for retries is 480 seconds.
+						"--retry-all-errors",                                                       // Retry on all errors, ensuring robustness against temporary failures.
+						"--trace-time",                                                             // Include timestamps in trace output for debugging.
+						"-w", "\\\"\\n---> HTTPCode=%{http_code} Time=%{time_total}ms <---\\n\\\"", // Format output to include HTTP code and response time.
+						fmt.Sprintf("http://%s:%d/echo?msg=hello", target, targetPort),
+					},
+				},
+			},
+			SecurityContext: &v1.PodSecurityContext{
+				RunAsNonRoot: aws.Bool(true),  // Ensures the pod runs as a non-root user for enhanced security.
+				RunAsUser:    aws.Int64(1000), // Specifies the user ID for the container process.
+				RunAsGroup:   aws.Int64(1000), // Specifies the group ID for the container process.
+				SeccompProfile: &v1.SeccompProfile{
+					Type: v1.SeccompProfileTypeRuntimeDefault, // Enforces runtime default seccomp profile for syscall filtering.
+				},
+			},
+			RestartPolicy: v1.RestartPolicyNever, // Prevents the pod from restarting automatically.
+			Affinity: &v1.Affinity{
+				NodeAffinity: &v1.NodeAffinity{
+					RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
+						NodeSelectorTerms: []v1.NodeSelectorTerm{
+							{
+								MatchExpressions: []v1.NodeSelectorRequirement{
+									{
+										Key:      "kubernetes.io/hostname",
+										Operator: v1.NodeSelectorOpIn,
+										Values:   []string{nodeName}, // Ensures the pod is scheduled on the specified node.
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+	ct := pod.Spec.Containers[0]
+	framework.Logf("In-Cluster test PodSpec Image=%v Command=%v Args=%v", ct.Image, ct.Command, ct.Args)
+
+	// Create the pod
+	_, err := cs.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to create HTTP test pod: %v", err)
+	}
+	// Clean up the pod
+	defer func() {
+		err = cs.CoreV1().Pods(namespace).Delete(context.TODO(), podName, metav1.DeleteOptions{})
+		if err != nil {
+			framework.Logf("Failed to delete pod %s: %v", podName, err)
+		}
+	}()
+
+	// Pod logs wrapper. Collect recent logs, or all, from a test pod.
+	gatherLogs := func(tail int) string {
+		opts := &v1.PodLogOptions{}
+		if tail == 0 {
+			tail = 20
+		}
+		opts.TailLines = aws.Int64(int64(tail))
+		logs, errL := cs.CoreV1().Pods(namespace).GetLogs(podName, opts).DoRaw(context.TODO())
+		if errL != nil {
+			framework.Logf("Failed to retrieve pod logs: %v", errL)
+			return ""
+		}
+		return string(logs)
+	}
+
+	// Wait for the test pod to complete. Limit waiter be higher than curl retries.
+	waitCount := 0
+	pendingCount := 0
+	err = wait.PollImmediate(15*time.Second, 15*time.Minute, func() (bool, error) {
+		p, err := cs.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
+		if err != nil {
+			framework.Logf("Error getting pod %s: %v", podName, err)
+			return false, err
+		}
+		framework.Logf("Pod %s status: Phase=%s", podName, p.Status.Phase)
+		podFinished := p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed
+
+		// Troubleshoot pending pods
+		if p.Status.Phase == v1.PodPending {
+			pendingCount++
+		}
+		if pendingCount%10 == 0 && pendingCount > 0 {
+			framework.Logf("Pod %s is pending for too long, checking events...", podName)
+			events, errE := cs.CoreV1().Events(namespace).List(context.TODO(), metav1.ListOptions{
+				FieldSelector: fmt.Sprintf("involvedObject.name=%s", podName),
+			})
+			if errE != nil {
+				framework.Logf("Failed to list events for pod %s: %v", podName, errE)
+			} else {
+				for _, event := range events.Items {
+					framework.Logf("Event: %s - %s", event.Reason, event.Message)
+				}
+			}
+		}
+		// frequently collect logs.
+		if waitCount > 0 && waitCount%4 == 0 {
+			framework.Logf("Tail logs for HTTP test pod:\n%s", gatherLogs(5))
+		}
+		if podFinished {
+			framework.Logf("Tail logs for HTTP test pod:\n%s", gatherLogs(0))
+		}
+		waitCount++
+		return podFinished, nil
+	})
+	// Check overall error
+	if err != nil {
+		return fmt.Errorf("error waiting for pod %s to complete: %v", podName, err)
+	}
+
+	// Inspect the pod's container status for exit code
+	pod, errS := cs.CoreV1().Pods(namespace).Get(context.TODO(), podName, metav1.GetOptions{})
+	if errS != nil {
+		return fmt.Errorf("failed to get pod %s: %v", podName, errS)
+	}
+	if len(pod.Status.ContainerStatuses) == 0 {
+		return fmt.Errorf("no container statuses found for pod %s", podName)
+	}
+	containerStatus := pod.Status.ContainerStatuses[0]
+
+	if containerStatus.State.Terminated != nil {
+		exitCode := containerStatus.State.Terminated.ExitCode
+		if exitCode != 0 {
+			errmsg := fmt.Errorf("pod %s exited with code %d", podName, exitCode)
+			framework.Logf("WARNING: %s.", errmsg.Error())
+			return errmsg
+		}
+	}
+
+	// Validate HTTP response format
+	// Expected format: HTTPCode=200 Time=<time>ms
+	response := gatherLogs(0)
+	if !strings.Contains(response, "HTTPCode=200") {
+		errmsg := fmt.Errorf("HTTP response validation failed: HTTP response format must be HTTPCode=200")
+		framework.Logf("WARNING: %s.", errmsg.Error())
+		return errmsg
+	}
+
+	return nil
+}

From 5a0f8bd3d52ff178599d1631b2962db51e055b3f Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Fri, 18 Jul 2025 12:08:20 -0300
Subject: [PATCH 4/9] refact: e2e tests documenting hooks and enhance
 logging/steps

This change enhance the logging and ginkgo steps of the loadbalancer
reachable e2e test cases.

The Hooks, created to allow test case customization, is renamed and documented.

Finally the configuration are encapsulated into a single structure to
enhance parallel tests.
---
 tests/e2e/loadbalancer.go | 447 ++++++++++++++++++++++----------------
 1 file changed, 258 insertions(+), 189 deletions(-)

diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go
index ec6ba18b59..77aae53efd 100644
--- a/tests/e2e/loadbalancer.go
+++ b/tests/e2e/loadbalancer.go
@@ -34,10 +34,12 @@ import (
 	"github.com/aws/aws-sdk-go-v2/aws"
 	"github.com/aws/aws-sdk-go-v2/config"
 	elbv2 "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2"
+	elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
 )
 
 const (
 	annotationLBType             = "service.beta.kubernetes.io/aws-load-balancer-type"
+	annotationLBInternal         = "service.beta.kubernetes.io/aws-load-balancer-internal"
 	annotationLBTargetNodeLabels = "service.beta.kubernetes.io/aws-load-balancer-target-node-labels"
 )
 
@@ -50,13 +52,6 @@ var (
 	}
 )
 
-// ClusterNodeDiscovery holds information about discovered worker nodes.
-type ClusterNodeDiscovery struct {
-	Selector     string
-	Count        int
-	SingleWorker string
-}
-
 // loadbalancer tests
 var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 	f := framework.NewDefaultFramework("cloud-provider-aws")
@@ -77,187 +72,237 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 	})
 
 	type loadBalancerTestCases struct {
-		Name                           string
-		ResourceSuffix                 string
-		Annotations                    map[string]string
-		PostConfigService              func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery)
-		PostRunValidation              func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery)
-		HookInClusterTestReachableHTTP bool
-		RequireAffinity                bool
-		SkipTestFailure                bool
+		// Overall test case configuration.
+		name             string
+		resourceSuffix   string
+		extraAnnotations map[string]string
+		listenerCount    int
+
+		// Hooks
+		// HookPostServiceConfig hook runs after the service manifest is created, and before the service is created.
+		hookPostServiceConfig func(cfg *e2eTestConfig)
+		// HookPostServiceCreate hook runs after the test is run.
+		hookPostServiceCreate func(cfg *e2eTestConfig)
+		// HookPreTest hook runs before the test is run.
+		hookPreTest func(cfg *e2eTestConfig)
+
+		// Flags to override default test behavior.
+		overrideTestRunInClusterReachableHTTP bool
+		requireAffinity                       bool
+
+		// Test verification
+		skipTestFailure bool
 	}
 	cases := []loadBalancerTestCases{
 		{
-			Name:           "should configure the loadbalancer based on annotations",
-			ResourceSuffix: "",
-			Annotations:    map[string]string{},
+			name:             "CLB should be reachable with default configurations",
+			resourceSuffix:   "",
+			extraAnnotations: map[string]string{},
 		},
 		{
-			Name:           "NLB should configure the loadbalancer based on annotations",
-			ResourceSuffix: "nlb",
-			Annotations: map[string]string{
-				annotationLBType: "nlb",
-			},
+			name:             "NLB should be reachable with default configurations",
+			resourceSuffix:   "nlb",
+			extraAnnotations: map[string]string{annotationLBType: "nlb"},
 		},
 		{
-			Name:           "NLB should configure the loadbalancer with target-node-labels",
-			ResourceSuffix: "sg-nd",
-			Annotations:    map[string]string{annotationLBType: "nlb"},
-			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
-				By(fmt.Sprintf("found %d nodes with selector %q\n", nodeDiscovery.Count, nodeDiscovery.Selector))
-				if svc.Annotations == nil {
-					svc.Annotations = map[string]string{}
+			name:             "NLB should be reachable with target-node-labels",
+			resourceSuffix:   "sg-nd",
+			extraAnnotations: map[string]string{annotationLBType: "nlb"},
+			hookPostServiceConfig: func(cfg *e2eTestConfig) {
+				framework.Logf("running hook post-service-config patching service annotations to test node label selector")
+				if cfg.svc.Annotations == nil {
+					cfg.svc.Annotations = map[string]string{}
 				}
-				svc.Annotations[annotationLBTargetNodeLabels] = nodeDiscovery.Selector
-				By(fmt.Sprintf("using service with annotations: %v", svc.Annotations))
+				cfg.svc.Annotations[annotationLBTargetNodeLabels] = cfg.nodeSelector
 			},
-			PostRunValidation: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
-				if len(svc.Status.LoadBalancer.Ingress) == 0 {
-					framework.Failf("No ingress found in LoadBalancer status for service %s/%s", svc.Namespace, svc.Name)
+			hookPostServiceCreate: func(cfg *e2eTestConfig) {
+				framework.Logf("running hook post-service-create to validate the number of targets in the load balancer selected")
+				if len(cfg.svc.Status.LoadBalancer.Ingress) == 0 {
+					framework.Failf("No ingress found in LoadBalancer status for service %s/%s", cfg.svc.Namespace, cfg.svc.Name)
 				}
-				lbDNS := svc.Status.LoadBalancer.Ingress[0].Hostname
-				framework.ExpectNoError(getLBTargetCount(context.TODO(), lbDNS, nodeDiscovery.Count), "AWS LB target count validation failed")
+				lbDNS := cfg.svc.Status.LoadBalancer.Ingress[0].Hostname
+				framework.ExpectNoError(getLBTargetCount(cfg.ctx, lbDNS, cfg.nodeCount), "AWS LB target count validation failed")
 			},
 		},
-		// hairpin connection tests for internal CLB and NLB services LBs.
+		// Hairpining traffic test for CLB.
 		{
-			Name:           "internal should support hairpin connection",
-			ResourceSuffix: "hp-clb-int",
-			Annotations: map[string]string{
-				"service.beta.kubernetes.io/aws-load-balancer-internal": "true",
+			name:           "CLB internal should be reachable with hairpinning traffic",
+			resourceSuffix: "hp-clb-int",
+			extraAnnotations: map[string]string{
+				annotationLBInternal: "true",
 			},
-			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
-				if svc.Annotations == nil {
-					svc.Annotations = map[string]string{}
+			hookPostServiceConfig: func(cfg *e2eTestConfig) {
+				framework.Logf("running hook post-service-config patching service annotations to enforce LB pins/selects target to a single node: kubernetes.io/hostname=%s", cfg.nodeSingleSample)
+				if cfg.svc.Annotations == nil {
+					cfg.svc.Annotations = map[string]string{}
 				}
-				svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", nodeDiscovery.SingleWorker)
-				framework.Logf("Using service annotations: %v", svc.Annotations)
+				cfg.svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", cfg.nodeSingleSample)
 			},
-			HookInClusterTestReachableHTTP: true,
-			RequireAffinity:                true,
+			overrideTestRunInClusterReachableHTTP: true,
+			requireAffinity:                       true,
 		},
-		// FIXME: https://github.com/kubernetes/cloud-provider-aws/issues/1160
+		// Hairpining traffic test for NLB.
 		// Hairpin connection work with target type as instance only when preserve client IP is disabled.
 		// Currently CCM does not provide an interface to create a service with that setup, making an internal
 		// Service to fail.
+		// FIXME: https://github.com/kubernetes/cloud-provider-aws/issues/1160
+		// Once issue 1160 is fixed, the skipTestFailure must be unset/false.
 		{
-			Name:           "NLB internal should support hairpin connection",
-			ResourceSuffix: "hp-nlb-int",
-			Annotations: map[string]string{
-				annotationLBType: "nlb",
-				"service.beta.kubernetes.io/aws-load-balancer-internal": "true",
+			name:           "NLB internal should be reachable with hairpinning traffic",
+			resourceSuffix: "hp-nlb-int",
+			extraAnnotations: map[string]string{
+				annotationLBType:     "nlb",
+				annotationLBInternal: "true",
 			},
-			PostConfigService: func(cfg *configServiceLB, svc *v1.Service, nodeDiscovery ClusterNodeDiscovery) {
-				if svc.Annotations == nil {
-					svc.Annotations = map[string]string{}
+			listenerCount: 1,
+			hookPostServiceConfig: func(cfg *e2eTestConfig) {
+				framework.Logf("running hook post-service-config patching service annotations to enforce LB pins/selects target to a single node: kubernetes.io/hostname=%s", cfg.nodeSingleSample)
+				if cfg.svc.Annotations == nil {
+					cfg.svc.Annotations = map[string]string{}
 				}
-				svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", nodeDiscovery.SingleWorker)
-				framework.Logf("Using service annotations: %v", svc.Annotations)
+				cfg.svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", cfg.nodeSingleSample)
 			},
-			HookInClusterTestReachableHTTP: true,
-			RequireAffinity:                true,
-			SkipTestFailure:                true,
+			overrideTestRunInClusterReachableHTTP: true,
+			requireAffinity:                       true,
+			skipTestFailure:                       true,
 		},
 	}
 
 	serviceNameBase := "lbconfig-test"
 	for _, tc := range cases {
-		It(tc.Name, func() {
-			nodeDiscovery := discoverClusterWorkerNode(cs)
+		It(tc.name, func() {
+			By("setting up test environment and discovering worker nodes")
+			e2e := newE2eTestConfig(cs)
+			e2e.discoverClusterWorkerNode()
+			framework.Logf("[SETUP] Test case: %s", tc.name)
+			framework.Logf("[SETUP] Worker nodes discovered: %d nodes, selector: %s, sample node: %s", e2e.nodeCount, e2e.nodeSelector, e2e.nodeSingleSample)
+
 			loadBalancerCreateTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(cs)
-			framework.Logf("Running tests against AWS with timeout %s", loadBalancerCreateTimeout)
+			framework.Logf("[CONFIG] AWS load balancer timeout: %s", loadBalancerCreateTimeout)
 
-			// Create Configuration
+			By("building service configuration with annotations")
 			serviceName := serviceNameBase
-			if len(tc.ResourceSuffix) > 0 {
-				serviceName = serviceName + "-" + tc.ResourceSuffix
+			if len(tc.resourceSuffix) > 0 {
+				serviceName = serviceName + "-" + tc.resourceSuffix
 			}
-			framework.Logf("namespace for load balancer conig test: %s", ns.Name)
-
-			By("creating a TCP service " + serviceName + " with type=LoadBalancerType in namespace " + ns.Name)
-			lbConfig := newConfigServiceLB()
-			lbConfig.LBJig = e2eservice.NewTestJig(cs, ns.Name, serviceName)
+			framework.Logf("[CONFIG] Service name: %s, namespace: %s", serviceName, ns.Name)
+			e2e.LBJig = e2eservice.NewTestJig(cs, ns.Name, serviceName)
 
 			// Hook annotations to support dynamic config
-			lbServiceConfig := lbConfig.buildService(tc.Annotations)
-
-			// Hook: PostConfigService patchs service configuration.
-			if tc.PostConfigService != nil {
-				tc.PostConfigService(lbConfig, lbServiceConfig, nodeDiscovery)
+			e2e.svc = e2e.buildService(tc.listenerCount, tc.extraAnnotations)
+			framework.Logf("[CONFIG] Service ports: %d, extra annotations: %v", len(e2e.svc.Spec.Ports), tc.extraAnnotations)
+
+			if tc.hookPostServiceConfig != nil {
+				By("executing hook post-service-config: applying service configuration")
+				framework.Logf("[HOOK] Executing post-service-config hook")
+				tc.hookPostServiceConfig(e2e)
+				framework.Logf("[HOOK] Final service annotations: %v", e2e.svc.Annotations)
 			}
 
-			// Create Load Balancer
-			By("creating loadbalancer for service " + lbServiceConfig.Namespace + "/" + lbServiceConfig.Name)
-			if _, err := lbConfig.LBJig.Client.CoreV1().Services(lbConfig.LBJig.Namespace).Create(context.TODO(), lbServiceConfig, metav1.CreateOptions{}); err != nil {
-				framework.ExpectNoError(fmt.Errorf("failed to create LoadBalancer Service %q: %v", lbServiceConfig.Name, err))
+			By("creating LoadBalancer service in Kubernetes")
+			if _, err := e2e.LBJig.Client.CoreV1().Services(e2e.LBJig.Namespace).Create(context.TODO(), e2e.svc, metav1.CreateOptions{}); err != nil {
+				framework.ExpectNoError(fmt.Errorf("failed to create LoadBalancer Service %q: %v", e2e.svc.Name, err))
 			}
+			framework.Logf("[K8S] LoadBalancer service created successfully")
 
-			By("waiting for loadbalancer for service " + lbServiceConfig.Namespace + "/" + lbServiceConfig.Name)
-			lbService, err := lbConfig.LBJig.WaitForLoadBalancer(loadBalancerCreateTimeout)
+			By("waiting for AWS load balancer provisioning")
+			var err error
+			e2e.svc, err = e2e.LBJig.WaitForLoadBalancer(loadBalancerCreateTimeout)
 			framework.ExpectNoError(err)
+			framework.Logf("[AWS] Load balancer provisioned successfully")
 
-			// Run Workloads
-			By("creating a pod to be part of the TCP service " + serviceName)
-			_, err = lbConfig.LBJig.Run(lbConfig.buildReplicationController(tc.RequireAffinity, nodeDiscovery))
+			By("creating backend server pods")
+			_, err = e2e.LBJig.Run(e2e.buildReplicationController(tc.requireAffinity))
 			framework.ExpectNoError(err)
+			framework.Logf("[K8S] Backend pods created, affinity required: %t", tc.requireAffinity)
 
-			// Hook: PostRunValidation performs LB validations after it is created (before test).
-			if tc.PostRunValidation != nil {
-				By("running post run validations")
-				tc.PostRunValidation(lbConfig, lbService, nodeDiscovery)
+			if tc.hookPostServiceCreate != nil {
+				By("executing hook post-service-create: applying service configuration")
+				tc.hookPostServiceCreate(e2e)
 			}
 
-			// Test the Service Endpoint
-			By("hitting the TCP service's LB External IP")
-			if len(lbService.Spec.Ports) == 0 {
-				framework.Failf("No ports found in service spec for service %s/%s", lbService.Namespace, lbService.Name)
+			By("collecting service and load balancer information")
+			if len(e2e.svc.Spec.Ports) == 0 {
+				framework.Failf("No ports found in service spec for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
+			}
+			if len(e2e.svc.Status.LoadBalancer.Ingress) == 0 {
+				framework.Failf("No ingress found in LoadBalancer status for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
 			}
-			if len(lbService.Status.LoadBalancer.Ingress) == 0 {
-				framework.Failf("No ingress found in LoadBalancer status for service %s/%s", lbService.Namespace, lbService.Name)
+			svcPort := int(e2e.svc.Spec.Ports[0].Port)
+			ingressAddress := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
+			framework.Logf("[LB-INFO] Ingress address: %s, port: %d", ingressAddress, svcPort)
+
+			if tc.hookPreTest != nil {
+				By("executing pre-test hook")
+				tc.hookPreTest(e2e)
 			}
-			svcPort := int(lbService.Spec.Ports[0].Port)
-			ingressIP := e2eservice.GetIngressPoint(&lbService.Status.LoadBalancer.Ingress[0])
-			framework.Logf("Load balancer's ingress IP: %s", ingressIP)
-
-			// Hook: HookInClusterTestReachableHTTP changes the default test function to run the client in the cluster.
-			if tc.HookInClusterTestReachableHTTP {
-				err := inClusterTestReachableHTTP(cs, ns.Name, nodeDiscovery.SingleWorker, ingressIP, svcPort)
-				if err != nil && tc.SkipTestFailure {
+
+			// overrideTestRunInClusterReachableHTTP changes the default test function to run the client in the cluster.
+			if tc.overrideTestRunInClusterReachableHTTP {
+				By("testing HTTP connectivity from internal network")
+				framework.Logf("[TEST] Running internal connectivity test from node: %s", e2e.nodeSingleSample)
+				err := inClusterTestReachableHTTP(cs, ns.Name, e2e.nodeSingleSample, ingressAddress, svcPort)
+				if err != nil && tc.skipTestFailure {
 					Skip(err.Error())
 				}
 				framework.ExpectNoError(err)
 			} else {
-				e2eservice.TestReachableHTTP(ingressIP, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
+				By("testing HTTP connectivity from external client")
+				framework.Logf("[TEST] Running external connectivity test to %s:%d", ingressAddress, svcPort)
+				e2eservice.TestReachableHTTP(ingressAddress, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
 			}
+			framework.Logf("[TEST] HTTP connectivity test completed successfully")
 
 			// Update the service to cluster IP
-			By("changing TCP service back to type=ClusterIP")
-			_, err = lbConfig.LBJig.UpdateService(func(s *v1.Service) {
+			By("cleaning up: converting service to ClusterIP")
+			_, err = e2e.LBJig.UpdateService(func(s *v1.Service) {
 				s.Spec.Type = v1.ServiceTypeClusterIP
 			})
 			framework.ExpectNoError(err)
 
 			// Wait for the load balancer to be destroyed asynchronously
-			_, err = lbConfig.LBJig.WaitForLoadBalancerDestroy(ingressIP, svcPort, loadBalancerCreateTimeout)
+			By("cleaning up: waiting for load balancer destruction")
+			framework.Logf("[CLEANUP] Waiting for load balancer destruction")
+			_, err = e2e.LBJig.WaitForLoadBalancerDestroy(ingressAddress, svcPort, loadBalancerCreateTimeout)
 			framework.ExpectNoError(err)
+			framework.Logf("[CLEANUP] Load balancer destroyed successfully")
 		})
 	}
 })
 
-// configServiceLB hold loadbalancer test configurations used by e2e lib (jig).
-type configServiceLB struct {
-	PodPort            uint16
-	PodProtocol        v1.Protocol
-	DefaultAnnotations map[string]string
+type e2eTestConfig struct {
+	ctx        context.Context
+	kubeClient clientset.Interface
+
+	// service configuration
+	cfgPortCount          int
+	cfgPodPort            uint16
+	cfgPodProtocol        v1.Protocol
+	cfgDefaultAnnotations map[string]string
+	LBJig                 *e2eservice.TestJig
 
-	LBJig *e2eservice.TestJig
+	// service instance
+	svc *v1.Service
+
+	// node discovery
+	nodeSelector     string
+	nodeCount        int
+	nodeSingleSample string
 }
 
-func newConfigServiceLB() *configServiceLB {
-	return &configServiceLB{
-		PodPort:     8080,
-		PodProtocol: v1.ProtocolTCP,
-		DefaultAnnotations: map[string]string{
+func newE2eTestConfig(cs clientset.Interface) *e2eTestConfig {
+	// Create a context with a reasonable timeout for e2e tests
+	// E2E tests can take several minutes for load balancer provisioning and configuration
+	ctx, cancel := context.WithTimeout(context.Background(), 25*time.Minute)
+	_ = cancel // We'll let the test framework handle cleanup
+
+	return &e2eTestConfig{
+		kubeClient:     cs,
+		cfgPortCount:   2,
+		ctx:            ctx,
+		cfgPodPort:     8080,
+		cfgPodProtocol: v1.ProtocolTCP,
+		cfgDefaultAnnotations: map[string]string{
 			"aws-load-balancer-backend-protocol": "http",
 			"aws-load-balancer-ssl-ports":        "https",
 		},
@@ -265,37 +310,34 @@ func newConfigServiceLB() *configServiceLB {
 }
 
 // buildService creates a service instance with custom annotations.
-func (s *configServiceLB) buildService(extraAnnotations map[string]string) *v1.Service {
+func (e2e *e2eTestConfig) buildService(portCount int, extraAnnotations map[string]string) *v1.Service {
 	svc := &v1.Service{
 		ObjectMeta: metav1.ObjectMeta{
-			Namespace:   s.LBJig.Namespace,
-			Name:        s.LBJig.Name,
-			Labels:      s.LBJig.Labels,
-			Annotations: make(map[string]string, len(s.DefaultAnnotations)+len(extraAnnotations)),
+			Namespace:   e2e.LBJig.Namespace,
+			Name:        e2e.LBJig.Name,
+			Labels:      e2e.LBJig.Labels,
+			Annotations: make(map[string]string, len(e2e.cfgDefaultAnnotations)+len(extraAnnotations)),
 		},
 		Spec: v1.ServiceSpec{
 			Type:            v1.ServiceTypeLoadBalancer,
 			SessionAffinity: v1.ServiceAffinityNone,
-			Selector:        s.LBJig.Labels,
-			Ports: []v1.ServicePort{
-				{
-					Name:       "http",
-					Protocol:   v1.ProtocolTCP,
-					Port:       int32(80),
-					TargetPort: intstr.FromInt(int(s.PodPort)),
-				},
-				{
-					Name:       "https",
-					Protocol:   v1.ProtocolTCP,
-					Port:       int32(443),
-					TargetPort: intstr.FromInt(int(s.PodPort)),
-				},
-			},
+			Selector:        e2e.LBJig.Labels,
 		},
 	}
+	if portCount == 0 {
+		portCount = e2e.cfgPortCount
+	}
+	for i := 0; i < portCount; i++ {
+		svc.Spec.Ports = append(svc.Spec.Ports, v1.ServicePort{
+			Name:       fmt.Sprintf("port-%d", i),
+			Protocol:   v1.ProtocolTCP,
+			Port:       int32(80 + i),
+			TargetPort: intstr.FromInt(int(e2e.cfgPodPort)),
+		})
+	}
 
 	// add default annotations - can be overriden by extra annotations
-	for aK, aV := range s.DefaultAnnotations {
+	for aK, aV := range e2e.cfgDefaultAnnotations {
 		svc.Annotations[aK] = aV
 	}
 
@@ -320,21 +362,21 @@ func (s *configServiceLB) buildService(extraAnnotations map[string]string) *v1.S
 // [1] https://github.com/kubernetes/kubernetes/blob/89d95c9713a8fd189e8ad555120838b3c4f888d1/test/e2e/framework/service/jig.go#L636
 // [2] https://github.com/kubernetes/kubernetes/issues/119021
 // [3] https://github.com/kubernetes/cloud-provider-aws/blob/master/tests/e2e/go.mod#L14
-func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscovery ClusterNodeDiscovery) func(rc *v1.ReplicationController) {
+func (e2e *e2eTestConfig) buildReplicationController(affinity bool) func(rc *v1.ReplicationController) {
 	return func(rc *v1.ReplicationController) {
 		var replicas int32 = 1
 		var grace int64 = 3
 		rc.ObjectMeta = metav1.ObjectMeta{
-			Namespace: s.LBJig.Namespace,
-			Name:      s.LBJig.Name,
-			Labels:    s.LBJig.Labels,
+			Namespace: e2e.LBJig.Namespace,
+			Name:      e2e.LBJig.Name,
+			Labels:    e2e.LBJig.Labels,
 		}
 		rc.Spec = v1.ReplicationControllerSpec{
 			Replicas: &replicas,
-			Selector: s.LBJig.Labels,
+			Selector: e2e.LBJig.Labels,
 			Template: &v1.PodTemplateSpec{
 				ObjectMeta: metav1.ObjectMeta{
-					Labels: s.LBJig.Labels,
+					Labels: e2e.LBJig.Labels,
 				},
 				Spec: v1.PodSpec{
 					Containers: []v1.Container{
@@ -343,14 +385,14 @@ func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscover
 							Image: imageutils.GetE2EImage(imageutils.Agnhost),
 							Args: []string{
 								"netexec",
-								fmt.Sprintf("--http-port=%d", s.PodPort),
-								fmt.Sprintf("--udp-port=%d", s.PodPort),
+								fmt.Sprintf("--http-port=%d", e2e.cfgPodPort),
+								fmt.Sprintf("--udp-port=%d", e2e.cfgPodPort),
 							},
 							ReadinessProbe: &v1.Probe{
 								PeriodSeconds: 3,
 								ProbeHandler: v1.ProbeHandler{
 									HTTPGet: &v1.HTTPGetAction{
-										Port: intstr.FromInt(int(s.PodPort)),
+										Port: intstr.FromInt(int(e2e.cfgPodPort)),
 										Path: "/hostName",
 									},
 								},
@@ -371,7 +413,7 @@ func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscover
 									{
 										Key:      "kubernetes.io/hostname",
 										Operator: v1.NodeSelectorOpIn,
-										Values:   []string{nodeDiscovery.SingleWorker},
+										Values:   []string{e2e.nodeSingleSample},
 									},
 								},
 							},
@@ -383,6 +425,31 @@ func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscover
 	}
 }
 
+// discoverClusterWorkerNode identifies and selects worker nodes in the cluster based on predefined node label selectors.
+// It returns a ClusterNodeDiscovery struct with the discovered information.
+func (e2e *e2eTestConfig) discoverClusterWorkerNode() {
+	var workerNodeList []string
+	framework.Logf("discovering node label used in the kubernetes distributions")
+	for _, selector := range lookupNodeSelectors {
+		nodeList, err := e2e.kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
+			LabelSelector: selector,
+		})
+		framework.ExpectNoError(err, "failed to list worker nodes")
+		if len(nodeList.Items) > 0 {
+			for _, node := range nodeList.Items {
+				workerNodeList = append(workerNodeList, node.Name)
+			}
+			// Save the first worker node in the list to be used in cases.
+			sort.Strings(workerNodeList)
+			e2e.nodeCount = len(nodeList.Items)
+			e2e.nodeSingleSample = workerNodeList[0]
+			e2e.nodeSelector = selector
+			return
+		}
+	}
+	framework.ExpectNoError(fmt.Errorf("unable to find node selector for %v", lookupNodeSelectors))
+}
+
 // getLBTargetCount verifies the number of registered targets for a given LBv2 DNS name matches the expected count.
 // The steps includes:
 // - Get Load Balancer ARN from DNS name extracted from service Status.LoadBalancer.Ingress[0].Hostname
@@ -392,27 +459,17 @@ func (s *configServiceLB) buildReplicationController(affinity bool, nodeDiscover
 // - Verify count matches number of worker nodes
 func getLBTargetCount(ctx context.Context, lbDNSName string, expectedTargets int) error {
 	// Load AWS config
-	cfg, err := config.LoadDefaultConfig(ctx)
+	elbClient, err := getAWSClientLoadBalancer(ctx)
 	if err != nil {
-		return fmt.Errorf("unable to load AWS config: %v", err)
+		return fmt.Errorf("unable to create AWS client: %v", err)
 	}
-	elbClient := elbv2.NewFromConfig(cfg)
 
 	// Get Load Balancer ARN from DNS name
-	describeLBs, err := elbClient.DescribeLoadBalancers(ctx, &elbv2.DescribeLoadBalancersInput{})
+	foundLB, err := getAWSLoadBalancerFromDNSName(ctx, elbClient, lbDNSName)
 	if err != nil {
-		return fmt.Errorf("failed to describe load balancers: %v", err)
-	}
-	var lbARN string
-	for _, lb := range describeLBs.LoadBalancers {
-		if strings.EqualFold(aws.ToString(lb.DNSName), lbDNSName) {
-			lbARN = aws.ToString(lb.LoadBalancerArn)
-			break
-		}
-	}
-	if lbARN == "" {
-		return fmt.Errorf("could not find LB with DNS name: %s", lbDNSName)
+		return fmt.Errorf("failed to get load balancer from DNS name: %v", err)
 	}
+	lbARN := aws.ToString(foundLB.LoadBalancerArn)
 
 	// List listeners for the load balancer
 	listenersOut, err := elbClient.DescribeListeners(ctx, &elbv2.DescribeListenersInput{
@@ -458,31 +515,43 @@ func getLBTargetCount(ctx context.Context, lbDNSName string, expectedTargets int
 	return nil
 }
 
-// discoverClusterWorkerNode identifies and selects worker nodes in the cluster based on predefined node label selectors.
-// It returns a ClusterNodeDiscovery struct with the discovered information.
-func discoverClusterWorkerNode(cs clientset.Interface) ClusterNodeDiscovery {
-	var workerNodeList []string
-	framework.Logf("discovering node label used in the kubernetes distributions")
-	for _, selector := range lookupNodeSelectors {
-		nodeList, err := cs.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
-			LabelSelector: selector,
-		})
-		framework.ExpectNoError(err, "failed to list worker nodes")
-		if len(nodeList.Items) > 0 {
-			for _, node := range nodeList.Items {
-				workerNodeList = append(workerNodeList, node.Name)
-			}
-			// Save the first worker node in the list to be used in cases.
-			sort.Strings(workerNodeList)
-			return ClusterNodeDiscovery{
-				Selector:     selector,
-				Count:        len(nodeList.Items),
-				SingleWorker: workerNodeList[0],
+// AWS helpers
+func getAWSClientLoadBalancer(ctx context.Context) (*elbv2.Client, error) {
+	cfg, err := config.LoadDefaultConfig(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("unable to load AWS config: %v", err)
+	}
+	return elbv2.NewFromConfig(cfg), nil
+}
+
+func getAWSLoadBalancerFromDNSName(ctx context.Context, elbClient *elbv2.Client, lbDNSName string) (*elbv2types.LoadBalancer, error) {
+	var foundLB *elbv2types.LoadBalancer
+	framework.Logf("describing load balancers with DNS %s", lbDNSName)
+
+	paginator := elbv2.NewDescribeLoadBalancersPaginator(elbClient, &elbv2.DescribeLoadBalancersInput{})
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		framework.ExpectNoError(err)
+
+		framework.Logf("found %d load balancers", len(page.LoadBalancers))
+		// Search for the load balancer with matching DNS name in this page
+		for i := range page.LoadBalancers {
+			if aws.ToString(page.LoadBalancers[i].DNSName) == lbDNSName {
+				foundLB = &page.LoadBalancers[i]
+				framework.Logf("found load balancer with DNS %s", aws.ToString(foundLB.DNSName))
+				break
 			}
 		}
+		if foundLB != nil {
+			break
+		}
 	}
-	framework.ExpectNoError(fmt.Errorf("unable to find node selector for %v", lookupNodeSelectors))
-	return ClusterNodeDiscovery{}
+
+	if foundLB == nil {
+		framework.Failf("No load balancer found with DNS name: %s", lbDNSName)
+	}
+
+	return foundLB, nil
 }
 
 // inClusterTestReachableHTTP creates a pod within the cluster to test HTTP connectivity to a target IP and port.

From e229ecc5d4ad99fe1fde4e59201d4b04ace76361 Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Mon, 11 Aug 2025 19:18:12 -0300
Subject: [PATCH 5/9] doc/service: describe supported target group attributes

Document the new annotation for NLB to handle target group attributes,
with examples and restrictions.
---
 docs/service_controller.md | 113 +++++++++++++++++++++++++++----------
 1 file changed, 83 insertions(+), 30 deletions(-)

diff --git a/docs/service_controller.md b/docs/service_controller.md
index 0434d29ac3..07acf8bfbd 100644
--- a/docs/service_controller.md
+++ b/docs/service_controller.md
@@ -3,33 +3,86 @@
 The service controller is responsible for watch for service and node object changes, so that it can create, update, or delete cloud load balancers corresponding to load balanced services.  Like the other controllers, we import the cloud-provider provided utility functions for managing the controller itself, which calls into cloud provider defined methods `GetLoadBalancer`, `GetLoadBalancerName`, `EnsureLoadBalancer`, `UpdateLoadBalancer`, and `EnsureLoadBalancerDeleted`.
 
 
-| Annotation | Valid Values | Default | Description |
-| --- | --- | --- | --- |
-| service.beta.kubernetes.io/aws-load-balancer-access-log-emit-interval          | [5\|60]                             | -   | How frequently the load balancer emits [access logs](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/access-log-collection.html), in minutes.  |
-| service.beta.kubernetes.io/aws-load-balancer-access-log-enabled                | [true\|false]                       | -   | If true, access logs is enabled.  |
-| service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-name         | -                                   | -   | Access log S3 bucket name.  |
-| service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-prefix       | -                                   | -   | Access log S3 bucket prefix.  |
-| service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags          | Comma-separated list of key=value   | -   | A comma-separated list of key-value pairs which will be recorded as additional tags in the ELB. For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2" |
-| service.beta.kubernetes.io/aws-load-balancer-backend-protocol                  | [http\|https\|ssl\|tcp]             | -   | Specifies the protocol spoken by the backend (pod) behind a listener. If `http` (default) or `https`, an HTTPS listener that terminates the connection and parses headers is created. If set to `ssl` or `tcp`, a "raw" SSL listener is used. If set to `http` and `aws-load-balancer-ssl-cert` is not used then a HTTP listener is used. |
-| service.beta.kubernetes.io/aws-load-balancer-connection-draining-enabled       | [true\|false]                       | -   | Enable [connection draining](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/config-conn-drain.html). |
-| service.beta.kubernetes.io/aws-load-balancer-connection-draining-timeout       | [1-3600]                            | 300 | The maximum time (in seconds) for the load balancer to keep connections alive before reporting the instance as de-registered. The maximum timeout value can be set between 1 and 3,600 seconds (the default is 300 seconds). When the maximum time limit is reached, the load balancer forcibly closes connections to the de-registering instance. |
-| service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout           | [1-4000]                            | 60  | The load balancer has a configured idle timeout period (in seconds) that applies to its connections. If no data has been sent or received by the time that the idle timeout period elapses, the load balancer closes the connection. |
-| service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled | [true\|false]                       | -   | With cross-zone load balancing, each load balancer node for your Classic Load Balancer distributes requests evenly across the registered instances in all enabled Availability Zones. If cross-zone load balancing is disabled, each load balancer node distributes requests evenly across the registered instances in its Availability Zone only. |
-| service.beta.kubernetes.io/aws-load-balancer-extra-security-groups             | Comma-separated list                | -   | Specifies additional security groups to be added to ELB.    |
-| service.beta.kubernetes.io/aws-load-balancer-security-groups                   | Comma-separated list                | -   | Specifies the security groups to be added to ELB. Differently from the annotation "service.beta.kubernetes.io/aws-load-balancer-extra-security-groups", this replaces all other security groups previously assigned to the ELB. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-healthy-threshold     | [2-10]                              | -   | Specifies the number of successive successful health checks required for a backend to be considered healthy for traffic. For NLB, healthy-threshold and unhealthy-threshold must be equal. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval              | [5-300]                             | 30  | Specifies, in seconds, the interval between health checks. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout               | [2-60]                              | 5   | The amount of time to wait when receiving a response from the health check, in seconds. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-unhealthy-threshold   | [2-10]                              | 2   | The number of consecutive failed health checks that must occur before declaring an EC2 instance unhealthy. |
-| service.beta.kubernetes.io/aws-load-balancer-internal                          | [true\|false]                       | -   | Indicates that the load balancer should be internal. |
-| service.beta.kubernetes.io/aws-load-balancer-proxy-protocol                    | [*]                                 | -   | Enables the proxy protocol on an ELB. Right now we only accept the value "*" which means enable the proxy protocol on all ELB backends. In the future we could adjust this to allow setting the proxy protocol only on certain backends. |
-| service.beta.kubernetes.io/aws-load-balancer-ssl-cert                          | IAM or ACM ARN                      | -   | Requests a secure listener. Value is a valid certificate ARN. For more, see the [elb listener config guide](http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-listener-config.html).  CertARN is an IAM or CM certificate ARN. |
-| service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy            | -                                   | ELBSecurityPolicy-2016-08 | Specifies SSL negotiation settings for the HTTPS/SSL listeners of your load balancer. Defaults to the default ELB policy. |
-| service.beta.kubernetes.io/aws-load-balancer-ssl-ports                         | Comma-separated list                | *   | Specifies a comma-separated list of ports that will use SSL/HTTPS listeners. Defaults to all. |
-| service.beta.kubernetes.io/aws-load-balancer-type                              | [nlb]                               | -   | Indicates the type of Load Balancer. The only valid value is nlb.  Leaving this field blank is equivalent to selecting ELB. |
-| service.beta.kubernetes.io/aws-load-balancer-eip-allocations                   | Comma-separated list                | -   | List of EIP allocations to associate with a internet-facing load balancer. Only valid for NLB. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-path                  | -                                   | /   | Specifies the http path for the health check in case of http/https protocol. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-port                  | [traffic-port\|1-65535]             | traffic-port | Specifies the TCP target port for the target group health check. |
-| service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol              | [tcp\|http\|https]                  | tcp | Specifies the protocol to use for the target group health check. |
-| service.beta.kubernetes.io/aws-load-balancer-subnets                           | Comma-separated list                | -   | Specifies the Availability Zone configuration for the load balancer. The values are comma separated list of subnetID or subnetName from different AZs. |
-| service.beta.kubernetes.io/aws-load-balancer-target-node-labels                | Comma-separated list of key=value   | -   | Specifies a comma-separated list of key-value pairs which will be used to select the target nodes for the load balancer. |
+| Annotation | Valid Values | Default | Valid for | Description |
+| --- | --- | --- | --- | --- |
+| service.beta.kubernetes.io/aws-load-balancer-access-log-emit-interval          | [5\|60]                             | -   | ELB | How frequently the load balancer emits [access logs](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/access-log-collection.html), in minutes.  |
+| service.beta.kubernetes.io/aws-load-balancer-access-log-enabled                | [true\|false]                       | -   | ELB | If true, access logs is enabled.  |
+| service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-name         | -                                   | -   | ELB | Access log S3 bucket name.  |
+| service.beta.kubernetes.io/aws-load-balancer-access-log-s3-bucket-prefix       | -                                   | -   | ELB | Access log S3 bucket prefix.  |
+| service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags          | Comma-separated list of key=value   | -   | ELB,NLB | A comma-separated list of key-value pairs which will be recorded as additional tags in the ELB. For example: "Key1=Val1,Key2=Val2,KeyNoVal1=,KeyNoVal2" |
+| service.beta.kubernetes.io/aws-load-balancer-backend-protocol                  | [http\|https\|ssl\|tcp]             | -   | ELB,NLB | Specifies the protocol spoken by the backend (pod) behind a listener. If `http` (default) or `https`, an HTTPS listener that terminates the connection and parses headers is created. If set to `ssl` or `tcp`, a "raw" SSL listener is used. If set to `http` and `aws-load-balancer-ssl-cert` is not used then a HTTP listener is used. |
+| service.beta.kubernetes.io/aws-load-balancer-connection-draining-enabled       | [true\|false]                       | -   | ELB | Enable [connection draining](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/config-conn-drain.html). |
+| service.beta.kubernetes.io/aws-load-balancer-connection-draining-timeout       | [1-3600]                            | 300 | ELB | The maximum time (in seconds) for the load balancer to keep connections alive before reporting the instance as de-registered. The maximum timeout value can be set between 1 and 3,600 seconds (the default is 300 seconds). When the maximum time limit is reached, the load balancer forcibly closes connections to the de-registering instance. |
+| service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout           | [1-4000]                            | 60  | ELB | The load balancer has a configured idle timeout period (in seconds) that applies to its connections. If no data has been sent or received by the time that the idle timeout period elapses, the load balancer closes the connection. |
+| service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled | [true\|false]                       | -   | ELB | With cross-zone load balancing, each load balancer node for your Classic Load Balancer distributes requests evenly across the registered instances in all enabled Availability Zones. If cross-zone load balancing is disabled, each load balancer node distributes requests evenly across the registered instances in its Availability Zone only. |
+| service.beta.kubernetes.io/aws-load-balancer-extra-security-groups             | Comma-separated list                | -   | ELB | Specifies additional security groups to be added to ELB.    |
+| service.beta.kubernetes.io/aws-load-balancer-security-groups                   | Comma-separated list                | -   | ELB | Specifies the security groups to be added to ELB. Differently from the annotation "service.beta.kubernetes.io/aws-load-balancer-extra-security-groups", this replaces all other security groups previously assigned to the ELB. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-healthy-threshold     | [2-10]                              | -   | NLB | Specifies the number of successive successful health checks required for a backend to be considered healthy for traffic. For NLB, healthy-threshold and unhealthy-threshold must be equal. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval              | [5-300]                             | 30  | NLB | Specifies, in seconds, the interval between health checks. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-timeout               | [2-60]                              | 5   | NLB | The amount of time to wait when receiving a response from the health check, in seconds. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-unhealthy-threshold   | [2-10]                              | 2   | NLB | The number of consecutive failed health checks that must occur before declaring an EC2 instance unhealthy. |
+| service.beta.kubernetes.io/aws-load-balancer-internal                          | [true\|false]                       | -   | ELB,NLB | Indicates that the load balancer should be internal. |
+| service.beta.kubernetes.io/aws-load-balancer-proxy-protocol                    | [*]                                 | -   | ELB | Enables the proxy protocol on an ELB. Right now we only accept the value "*" which means enable the proxy protocol on all ELB backends. In the future we could adjust this to allow setting the proxy protocol only on certain backends. |
+| service.beta.kubernetes.io/aws-load-balancer-ssl-cert                          | IAM or ACM ARN                      | -   | ELB,NLB | Requests a secure listener. Value is a valid certificate ARN. For more, see the [elb listener config guide](http://docs.aws.amazon.com/ElasticLoadBalancing/latest/DeveloperGuide/elb-listener-config.html).  CertARN is an IAM or CM certificate ARN. |
+| service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy            | -                                   | ELBSecurityPolicy-2016-08 | ELB,NLB | Specifies SSL negotiation settings for the HTTPS/SSL listeners of your load balancer. Defaults to the default ELB policy. |
+| service.beta.kubernetes.io/aws-load-balancer-ssl-ports                         | Comma-separated list                | *   | ELB,NLB | Specifies a comma-separated list of ports that will use SSL/HTTPS listeners. Defaults to all. |
+| service.beta.kubernetes.io/aws-load-balancer-type                              | [nlb]                               | -   | ELB,NLB | Indicates the type of Load Balancer. The only valid value is nlb.  Leaving this field blank is equivalent to selecting ELB. |
+| service.beta.kubernetes.io/aws-load-balancer-eip-allocations                   | Comma-separated list                | -   | NLB | List of EIP allocations to associate with a internet-facing load balancer. Only valid for NLB. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-path                  | -                                   | /   | NLB | Specifies the http path for the health check in case of http/https protocol. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-port                  | [traffic-port\|1-65535]             | traffic-port | NLB | Specifies the TCP target port for the target group health check. |
+| service.beta.kubernetes.io/aws-load-balancer-healthcheck-protocol              | [tcp\|http\|https]                  | tcp | NLB | Specifies the protocol to use for the target group health check. |
+| service.beta.kubernetes.io/aws-load-balancer-subnets                           | Comma-separated list                | -   | ELB,NLB | Specifies the Availability Zone configuration for the load balancer. The values are comma separated list of subnetID or subnetName from different AZs. |
+| service.beta.kubernetes.io/aws-load-balancer-target-node-labels                | Comma-separated list of key=value   | -   | ELB,NLB | Specifies a comma-separated list of key-value pairs which will be used to select the target nodes for the load balancer. |
+| service.beta.kubernetes.io/aws-load-balancer-target-group-attributes          | Comma-separated list of key=value   | -   | NLB | Specifies a comma-separated list of key-value pairs which will be applied as target group attributes. For example: "preserve_client_ip.enabled=false". The list of supported values is available [here](#tg-supported-attributes). |
+
+
+## Target group attributes for Service type-loadBalancer NLB <a name="tg-supported-attributes"></a>
+
+The following target group attributes are supported by the controller using the annotation `service.beta.kubernetes.io/aws-load-balancer-target-group-attributes`:
+
+| Attribute | Values | Description |
+| -- | -- | -- |
+| preserve_client_ip.enabled | [true\|false] | Whether to preserve client IP addresses when terminating connections at the target group level |
+| proxy_protocol_v2.enabled | [true\|false] | Whether to enable proxy protocol v2 on the target group |
+
+**Format:** Attributes are specified as `key=value` pairs, separated by commas.
+
+**Example:**
+```yaml
+service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=true,proxy_protocol_v2.enabled=false
+```
+
+### Usage Example 1 - working with hairpin connection on internal NLB
+
+The following Service example changes the Target Group Traffic Control attribute "Preserve client IP addresses" from the default (`true`, when target type is instance) to `false`:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: $SVC_NAME
+  namespace: ${APP_NAMESPACE}
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: nlb
+    service.beta.kubernetes.io/aws-load-balancer-internal: true
+    service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=false
+[...]
+```
+
+### Usage Example 2 - working with hairpin connection on internal NLB tracking source IP address
+
+The following example allow users to fine tune the Services for a backend which requires tracking the original source IP address of internal Load Balancers NLB with support of hairpin connections:
+
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: $SVC_NAME
+  namespace: ${APP_NAMESPACE}
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: nlb
+    service.beta.kubernetes.io/aws-load-balancer-internal: true
+    service.beta.kubernetes.io/aws-load-balancer-target-group-attributes: preserve_client_ip.enabled=false,proxy_protocol_v2.enabled=true
+[...]
+```

From a4d701d1e18e29754691a5ed7bb6d1d5708aa03a Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Mon, 11 Aug 2025 19:17:07 -0300
Subject: [PATCH 6/9] e2e/loadbalancer: implement hairpin connection cases

Implementing the hairpin connection test cases, and exposing an issue on
NLB with internal scheme which fails when the client is trying
to access a service loadbalancer which is hosted in the same node.

The hairpin connection is caused by the client IP preservation attribute
is set to true (default), and the service does not provide an interface
to prevent the issue.

The e2e is expecting to pass to prevent permanent failures in CI, but it
is tracked by an issue https://github.com/kubernetes/cloud-provider-aws/issues/1160.
---
 tests/e2e/loadbalancer.go | 143 ++++++++++++++++++++++++++++++++------
 1 file changed, 123 insertions(+), 20 deletions(-)

diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go
index 77aae53efd..2faaced271 100644
--- a/tests/e2e/loadbalancer.go
+++ b/tests/e2e/loadbalancer.go
@@ -38,9 +38,10 @@ import (
 )
 
 const (
-	annotationLBType             = "service.beta.kubernetes.io/aws-load-balancer-type"
-	annotationLBInternal         = "service.beta.kubernetes.io/aws-load-balancer-internal"
-	annotationLBTargetNodeLabels = "service.beta.kubernetes.io/aws-load-balancer-target-node-labels"
+	annotationLBType                  = "service.beta.kubernetes.io/aws-load-balancer-type"
+	annotationLBInternal              = "service.beta.kubernetes.io/aws-load-balancer-internal"
+	annotationLBTargetNodeLabels      = "service.beta.kubernetes.io/aws-load-balancer-target-node-labels"
+	annotationLBTargetGroupAttributes = "service.beta.kubernetes.io/aws-load-balancer-target-group-attributes"
 )
 
 var (
@@ -142,19 +143,20 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			requireAffinity:                       true,
 		},
 		// Hairpining traffic test for NLB.
-		// Hairpin connection work with target type as instance only when preserve client IP is disabled.
-		// Currently CCM does not provide an interface to create a service with that setup, making an internal
-		// Service to fail.
-		// FIXME: https://github.com/kubernetes/cloud-provider-aws/issues/1160
-		// Once issue 1160 is fixed, the skipTestFailure must be unset/false.
+		// The target type instance (default) sets the preserve client IP attribute to true,
+		// the NLB target group attributes are set to preserve_client_ip.enabled=false to allow hairpining traffic.
+		// The test also validates the target group attributes are set correctly to AWS resource.
 		{
 			name:           "NLB internal should be reachable with hairpinning traffic",
 			resourceSuffix: "hp-nlb-int",
 			extraAnnotations: map[string]string{
-				annotationLBType:     "nlb",
-				annotationLBInternal: "true",
+				annotationLBType:                  "nlb",
+				annotationLBInternal:              "true",
+				annotationLBTargetGroupAttributes: "preserve_client_ip.enabled=false",
 			},
-			listenerCount: 1,
+			listenerCount:                         1,
+			overrideTestRunInClusterReachableHTTP: true,
+			requireAffinity:                       true,
 			hookPostServiceConfig: func(cfg *e2eTestConfig) {
 				framework.Logf("running hook post-service-config patching service annotations to enforce LB pins/selects target to a single node: kubernetes.io/hostname=%s", cfg.nodeSingleSample)
 				if cfg.svc.Annotations == nil {
@@ -162,9 +164,85 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 				}
 				cfg.svc.Annotations[annotationLBTargetNodeLabels] = fmt.Sprintf("kubernetes.io/hostname=%s", cfg.nodeSingleSample)
 			},
-			overrideTestRunInClusterReachableHTTP: true,
-			requireAffinity:                       true,
-			skipTestFailure:                       true,
+			hookPreTest: func(e2e *e2eTestConfig) {
+				framework.Logf("running hook pre-test: verify target group attributes are set correctly to AWS resource")
+
+				if e2e.svc.Status.LoadBalancer.Ingress[0].Hostname == "" && e2e.svc.Status.LoadBalancer.Ingress[0].IP == "" {
+					framework.Failf("LoadBalancer ingress is empty (no hostname or IP) for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
+				}
+
+				hostAddr := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
+				framework.Logf("Load balancer's ingress address: %s", hostAddr)
+
+				if hostAddr == "" {
+					framework.Failf("Unable to get LoadBalancer ingress address for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
+				}
+
+				elbClient, err := getAWSClientLoadBalancer(e2e.ctx)
+				framework.ExpectNoError(err, "failed to create AWS ELB client")
+
+				// DescribeLoadBalancers API doesn't support filtering by DNS name directly
+				// Use AWS SDK paginator to search through all load balancers
+				foundLB, err := getAWSLoadBalancerFromDNSName(e2e.ctx, elbClient, hostAddr)
+				framework.ExpectNoError(err, "failed to find load balancer with DNS name %s", hostAddr)
+				if foundLB == nil {
+					framework.Failf("Found load balancer is nil for DNS name %s", hostAddr)
+				}
+
+				lbARN := aws.ToString(foundLB.LoadBalancerArn)
+				if lbARN == "" {
+					framework.Failf("Load balancer ARN is empty for DNS name %s", hostAddr)
+				}
+				framework.Logf("Found load balancer: %s with ARN: %s", aws.ToString(foundLB.LoadBalancerName), lbARN)
+
+				// lookup target group ARN from load balancer ARN
+				targetGroups, err := elbClient.DescribeTargetGroups(e2e.ctx, &elbv2.DescribeTargetGroupsInput{
+					LoadBalancerArn: aws.String(lbARN),
+				})
+				framework.ExpectNoError(err, "failed to describe target groups")
+				framework.ExpectEqual(len(targetGroups.TargetGroups), 1)
+
+				targetGroupAttributes, err := elbClient.DescribeTargetGroupAttributes(e2e.ctx, &elbv2.DescribeTargetGroupAttributesInput{
+					TargetGroupArn: aws.String(aws.ToString(targetGroups.TargetGroups[0].TargetGroupArn)),
+				})
+				framework.ExpectNoError(err, "failed to describe target group attributes")
+
+				// verify if the target group attributes are set correctly
+
+				annotationToDict := map[string]string{}
+				for _, v := range strings.Split(e2e.svc.Annotations[annotationLBTargetGroupAttributes], ",") {
+					parts := strings.Split(v, "=")
+					annotationToDict[parts[0]] = parts[1]
+				}
+				framework.Logf("TG attribute Annotation to dict: %v", annotationToDict)
+
+				framework.Logf("=== All Target Group Attributes from AWS ===")
+				for _, attr := range targetGroupAttributes.Attributes {
+					framework.Logf("  %s=%s", aws.ToString(attr.Key), aws.ToString(attr.Value))
+				}
+
+				framework.Logf("=== Expected Target Group Attributes from Annotation ===")
+				for key, value := range annotationToDict {
+					framework.Logf("  %s=%s", key, value)
+				}
+
+				// Check if our expected attributes are present and match
+				framework.Logf("=== Verifying Target Group Attributes ===")
+				for _, attr := range targetGroupAttributes.Attributes {
+					if expectedValue, ok := annotationToDict[aws.ToString(attr.Key)]; ok {
+						actualValue := aws.ToString(attr.Value)
+						framework.Logf("Checking attribute: %s", aws.ToString(attr.Key))
+						framework.Logf("  Expected: %s", expectedValue)
+						framework.Logf("  Actual:   %s", actualValue)
+
+						if actualValue != expectedValue {
+							framework.Failf("Target group attribute mismatch for %s: expected %s, got %s", aws.ToString(attr.Key), expectedValue, actualValue)
+						} else {
+							framework.Logf("✓ Target group attribute %s matches expected value %s", aws.ToString(attr.Key), expectedValue)
+						}
+					}
+				}
+			},
 		},
 	}
 
@@ -208,7 +286,23 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			By("waiting for AWS load balancer provisioning")
 			var err error
 			e2e.svc, err = e2e.LBJig.WaitForLoadBalancer(loadBalancerCreateTimeout)
-			framework.ExpectNoError(err)
+			// Collect comprehensive debugging information when LoadBalancer provisioning fails
+			if err != nil {
+				serviceName := e2e.LBJig.Name
+				if e2e.svc != nil {
+					serviceName = e2e.svc.Name
+				}
+				framework.Logf("ERROR: LoadBalancer provisioning failed for service %q: %v", serviceName, err)
+				framework.Logf("ERROR: LoadBalancer provisioning timeout reached after %v", loadBalancerCreateTimeout)
+
+				// Ensure we have detailed debugging information before failing
+				framework.Logf("=== LoadBalancer Provisioning Failure Debug Information ===")
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of LoadBalancer Provisioning Failure Debug Information ===")
+
+				// Fail the test immediately to prevent further execution
+				framework.ExpectNoError(err, "LoadBalancer provisioning failed - check debug information above")
+			}
 			framework.Logf("[AWS] Load balancer provisioned successfully")
 
 			By("creating backend server pods")
@@ -222,12 +316,19 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			}
 
 			By("collecting service and load balancer information")
+			if e2e.svc == nil {
+				framework.Logf("=== Service Validation Error Debug Information ===")
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of Service Validation Error Debug Information ===")
+				framework.Failf("Service is nil after LoadBalancer provisioning for service %s", e2e.LBJig.Name)
+			}
 			if len(e2e.svc.Spec.Ports) == 0 {
 				framework.Failf("No ports found in service spec for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
 			}
 			if len(e2e.svc.Status.LoadBalancer.Ingress) == 0 {
 				framework.Failf("No ingress found in LoadBalancer status for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
 			}
+
 			svcPort := int(e2e.svc.Spec.Ports[0].Port)
 			ingressAddress := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
 			framework.Logf("[LB-INFO] Ingress address: %s, port: %d", ingressAddress, svcPort)
@@ -239,7 +340,7 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 
 			// overrideTestRunInClusterReachableHTTP changes the default test function to run the client in the cluster.
 			if tc.overrideTestRunInClusterReachableHTTP {
-				By("testing HTTP connectivity from internal network")
+				By("testing HTTP connectivity for internal load balancer")
 				framework.Logf("[TEST] Running internal connectivity test from node: %s", e2e.nodeSingleSample)
 				err := inClusterTestReachableHTTP(cs, ns.Name, e2e.nodeSingleSample, ingressAddress, svcPort)
 				if err != nil && tc.skipTestFailure {
@@ -247,7 +348,7 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 				}
 				framework.ExpectNoError(err)
 			} else {
-				By("testing HTTP connectivity from external client")
+				By("testing HTTP connectivity for external/internet-facing load balancer")
 				framework.Logf("[TEST] Running external connectivity test to %s:%d", ingressAddress, svcPort)
 				e2eservice.TestReachableHTTP(ingressAddress, svcPort, e2eservice.LoadBalancerLagTimeoutAWS)
 			}
@@ -531,9 +632,11 @@ func getAWSLoadBalancerFromDNSName(ctx context.Context, elbClient *elbv2.Client,
 	paginator := elbv2.NewDescribeLoadBalancersPaginator(elbClient, &elbv2.DescribeLoadBalancersInput{})
 	for paginator.HasMorePages() {
 		page, err := paginator.NextPage(ctx)
-		framework.ExpectNoError(err)
+		if err != nil {
+			return nil, fmt.Errorf("failed to describe load balancers: %v", err)
+		}
 
-		framework.Logf("found %d load balancers", len(page.LoadBalancers))
+		framework.Logf("found %d load balancers in page", len(page.LoadBalancers))
 		// Search for the load balancer with matching DNS name in this page
 		for i := range page.LoadBalancers {
 			if aws.ToString(page.LoadBalancers[i].DNSName) == lbDNSName {
@@ -548,7 +651,7 @@ func getAWSLoadBalancerFromDNSName(ctx context.Context, elbClient *elbv2.Client,
 	}
 
 	if foundLB == nil {
-		framework.Failf("No load balancer found with DNS name: %s", lbDNSName)
+		return nil, fmt.Errorf("no load balancer found with DNS name: %s", lbDNSName)
 	}
 
 	return foundLB, nil

From 0b874675ba3f7dd8840b4321fc9f157b6cad8477 Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Wed, 20 Aug 2025 09:19:39 -0300
Subject: [PATCH 7/9] aws/validations: introduce pre-flight validations for
 EnsureLoadBalancer

Introduce pre-flight validations adding pre-flight checks for
EnsureLoadBalancer with tasks to validate Service object constraints
prior making calls to the provider.

This aims to prevent changes to the resources when invalid configuration
is provided.

Currently only NLB target group attributes validations is added as part of this change.

feat/tg-attr: support target group attrib annotation on NLB
---
 pkg/providers/v1/aws_validations.go      | 116 ++++++++++
 pkg/providers/v1/aws_validations_test.go | 268 +++++++++++++++++++++++
 2 files changed, 384 insertions(+)
 create mode 100644 pkg/providers/v1/aws_validations.go
 create mode 100644 pkg/providers/v1/aws_validations_test.go

diff --git a/pkg/providers/v1/aws_validations.go b/pkg/providers/v1/aws_validations.go
new file mode 100644
index 0000000000..51e26e2a3d
--- /dev/null
+++ b/pkg/providers/v1/aws_validations.go
@@ -0,0 +1,116 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aws
+
+import (
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+)
+
+// validationInput is the input parameters for validations.
+// TODO: ensure validations receive copy of values preventing mutation.
+type awsValidationInput struct {
+	apiService  *v1.Service
+	annotations map[string]string
+}
+
+// ensureLoadBalancerValidation validates the Service configuration early on EnsureLoadBalancer.
+// It validates the Service annotations and other constraints provided by the user are valid and supported by the controller.
+// It does not validate the AWS constraints.
+//
+// input:
+// v: awsValidationInput containing the required configuration to validate the Service object.
+//
+// returns:
+// - error: validation errors.
+func ensureLoadBalancerValidation(v *awsValidationInput) error {
+	// Validate Service annotations.
+	if err := validateServiceAnnotations(v); err != nil {
+		return err
+	}
+
+	// TODO: migrate other validations from EnsureLoadBalancer to this function.
+	return nil
+}
+
+// validateServiceAnnotations validates the service annotations constraints provided by the user
+// are valid and supported by the controller.
+func validateServiceAnnotations(v *awsValidationInput) error {
+	isNLB := isNLB(v.annotations)
+
+	// ServiceAnnotationLoadBalancerTargetGroupAttributes
+	if _, present := v.annotations[ServiceAnnotationLoadBalancerTargetGroupAttributes]; present {
+		if !isNLB {
+			return fmt.Errorf("target group annotations attribute is only supported for NLB")
+		}
+		if err := validateServiceAnnotationTargetGroupAttributes(v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// validateServiceAnnotationTargetGroupAttributes validates the target group attributes set through annotation:
+// Annotation: service.beta.kubernetes.io/aws-load-balancer-target-group-attributes
+//
+// input:
+// v: awsValidationInput containing the required configuration to validate the Service object.
+//
+// returns:
+// - error: validation errors.
+func validateServiceAnnotationTargetGroupAttributes(v *awsValidationInput) error {
+	errPrefix := "error validating target group attributes"
+
+	// Attributes are in format key=value separated by comma.
+	annotationGroupAttributes := getKeyValuePropertiesFromAnnotation(v.annotations, ServiceAnnotationLoadBalancerTargetGroupAttributes)
+	targetGroupAttributes := make(map[string]string, len(annotationGroupAttributes))
+
+	for attrKey, attrValue := range annotationGroupAttributes {
+		if _, ok := targetGroupAttributes[attrKey]; ok {
+			return fmt.Errorf("%s: %q is set twice in the annotation", errPrefix, attrKey)
+		}
+		if len(attrValue) == 0 {
+			return fmt.Errorf("%s: attribute value is empty for %q", errPrefix, attrKey)
+		}
+
+		switch attrKey {
+		case targetGroupAttributePreserveClientIPEnabled:
+			if attrValue != "true" && attrValue != "false" {
+				return fmt.Errorf("%s: invalid attribute value for %q: %s", errPrefix, attrKey, attrValue)
+			}
+			// AWS restriction: Client IP preservation can't be disabled for UDP and TCP_UDP target groups.
+			for _, port := range v.apiService.Spec.Ports {
+				if (port.Protocol == v1.ProtocolUDP || port.Protocol == "TCP_UDP") && attrValue == "false" {
+					return fmt.Errorf("%s: client IP preservation can't be disabled for UDP ports", errPrefix)
+				}
+			}
+			targetGroupAttributes[attrKey] = attrValue
+
+		case targetGroupAttributeProxyProtocolV2Enabled:
+			if attrValue != "true" && attrValue != "false" {
+				return fmt.Errorf("%s: invalid attribute value for %q: %s", errPrefix, attrKey, attrValue)
+			}
+			targetGroupAttributes[attrKey] = attrValue
+
+		default:
+			return fmt.Errorf("%s: the attribute %q is not supported by the controller or is invalid", errPrefix, attrKey)
+		}
+	}
+
+	return nil
+}
diff --git a/pkg/providers/v1/aws_validations_test.go b/pkg/providers/v1/aws_validations_test.go
new file mode 100644
index 0000000000..3f33774219
--- /dev/null
+++ b/pkg/providers/v1/aws_validations_test.go
@@ -0,0 +1,268 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aws
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+func TestValidateServiceAnnotationTargetGroupAttributes(t *testing.T) {
+	tests := []struct {
+		name          string
+		annotations   map[string]string
+		servicePorts  []v1.ServicePort
+		expectedError string
+	}{
+		{
+			name:        "no target group attributes annotation",
+			annotations: map[string]string{},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "empty target group attributes annotation",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "valid preserve_client_ip.enabled=true",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "valid preserve_client_ip.enabled=false",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "valid proxy_protocol_v2.enabled=true",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=true",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "valid proxy_protocol_v2.enabled=false",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "valid multiple attributes",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true,proxy_protocol_v2.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "duplicate attribute in annotation (last one wins - no error expected)",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true,preserve_client_ip.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "", // getKeyValuePropertiesFromAnnotation overwrites, so no duplicate detection
+		},
+		{
+			name: "empty attribute value",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "attribute value is empty for \"preserve_client_ip.enabled\"",
+		},
+		{
+			name: "invalid preserve_client_ip.enabled value",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=invalid",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "invalid attribute value for \"preserve_client_ip.enabled\": invalid",
+		},
+		{
+			name: "invalid proxy_protocol_v2.enabled value",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=yes",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "invalid attribute value for \"proxy_protocol_v2.enabled\": yes",
+		},
+		{
+			name: "unsupported attribute",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "unsupported_attribute=value",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "the attribute \"unsupported_attribute\" is not supported by the controller or is invalid",
+		},
+		{
+			name: "preserve_client_ip.enabled=false with UDP port should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 53, Protocol: v1.ProtocolUDP},
+			},
+			expectedError: "client IP preservation can't be disabled for UDP ports",
+		},
+		{
+			name: "preserve_client_ip.enabled=false with TCP_UDP port should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 53, Protocol: "TCP_UDP"},
+			},
+			expectedError: "client IP preservation can't be disabled for UDP ports",
+		},
+		{
+			name: "preserve_client_ip.enabled=true with UDP port should succeed",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 53, Protocol: v1.ProtocolUDP},
+			},
+			expectedError: "",
+		},
+		{
+			name: "preserve_client_ip.enabled=false with mixed TCP and UDP ports should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+				{Port: 53, Protocol: v1.ProtocolUDP},
+			},
+			expectedError: "client IP preservation can't be disabled for UDP ports",
+		},
+		{
+			name: "multiple attributes with preserve_client_ip.enabled=false and UDP port should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false,proxy_protocol_v2.enabled=true",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 53, Protocol: v1.ProtocolUDP},
+			},
+			expectedError: "client IP preservation can't be disabled for UDP ports",
+		},
+		{
+			name: "case sensitivity - preserve_client_ip.enabled with True should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=True",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "invalid attribute value for \"preserve_client_ip.enabled\": True",
+		},
+		{
+			name: "case sensitivity - proxy_protocol_v2.enabled with FALSE should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=FALSE",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "invalid attribute value for \"proxy_protocol_v2.enabled\": FALSE",
+		},
+		{
+			name: "whitespace in attribute values should fail",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled= true ",
+			},
+			servicePorts: []v1.ServicePort{
+				{Port: 80, Protocol: v1.ProtocolTCP},
+			},
+			expectedError: "invalid attribute value for \"preserve_client_ip.enabled\":",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Create a test service with the specified ports
+			service := &v1.Service{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:        "test-service",
+					Namespace:   "test-namespace",
+					Annotations: tt.annotations,
+				},
+				Spec: v1.ServiceSpec{
+					Type:  v1.ServiceTypeLoadBalancer,
+					Ports: tt.servicePorts,
+				},
+			}
+
+			// Create validation input
+			input := &awsValidationInput{
+				apiService:  service,
+				annotations: tt.annotations,
+			}
+
+			// Execute the validation
+			err := validateServiceAnnotationTargetGroupAttributes(input)
+
+			// Verify the result
+			if tt.expectedError == "" {
+				assert.NoError(t, err, "Expected no error for test case: %s", tt.name)
+			} else {
+				assert.Error(t, err, "Expected error for test case: %s", tt.name)
+				assert.Contains(t, err.Error(), tt.expectedError, "Error message should contain expected text for test case: %s", tt.name)
+			}
+		})
+	}
+}

From 07f68c1dd5a3bbb52fd9de7728f230333c57255c Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Wed, 20 Aug 2025 09:19:05 -0300
Subject: [PATCH 8/9] feat/tg-attr: support target group attrib annotation on
 NLB

Introduce the target group annotation[1] for all listeners on a Service
type-loadBalancer NLB.

[1] Annotation service.beta.kubernetes.io/aws-load-balancer-target-group-attributes

The annotation provides a interface for users to opt into non-default
configurations of a target group when creating or updating a Service.

This change also provides a fix for a critical hairpin bug impacting NLB
default configuration (using target type instance), which disables the
'preserve source ip configuration' attribute, leading to timeouts in
such scenario.
---
 pkg/providers/v1/aws.go                   |  28 +
 pkg/providers/v1/aws_loadbalancer.go      | 155 +++++
 pkg/providers/v1/aws_loadbalancer_test.go | 737 ++++++++++++++++++++++
 3 files changed, 920 insertions(+)

diff --git a/pkg/providers/v1/aws.go b/pkg/providers/v1/aws.go
index eccec63466..18a39979bb 100644
--- a/pkg/providers/v1/aws.go
+++ b/pkg/providers/v1/aws.go
@@ -215,6 +215,12 @@ const ServiceAnnotationLoadBalancerHCTimeout = "service.beta.kubernetes.io/aws-l
 // service to specify, in seconds, the interval between health checks.
 const ServiceAnnotationLoadBalancerHCInterval = "service.beta.kubernetes.io/aws-load-balancer-healthcheck-interval"
 
+// ServiceAnnotationLoadBalancerTargetGroupAttributes is the annotation used on the
+// service to specify a comma-separated list of key-value pairs which will be applied as
+// target group attributes.
+// For example: "preserve_client_ip.enabled=false,proxy_protocol_v2.enabled=true"
+const ServiceAnnotationLoadBalancerTargetGroupAttributes = "service.beta.kubernetes.io/aws-load-balancer-target-group-attributes"
+
 // ServiceAnnotationLoadBalancerEIPAllocations is the annotation used on the
 // service to specify a comma separated list of EIP allocations to use as
 // static IP addresses for the NLB. Only supported on elbv2 (NLB)
@@ -266,6 +272,20 @@ const (
 	regularAvailabilityZoneType = "availability-zone"
 )
 
+// Target Group Attributes
+// https://pkg.go.dev/github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2@main/types#TargetGroupAttribute
+const (
+	// targetGroupAttributePreserveClientIPEnabled is the target group attribute preserve_client_ip.enabled.
+	// Indicates whether client IP preservation is enabled.
+	// Valid values are true or false.
+	targetGroupAttributePreserveClientIPEnabled = "preserve_client_ip.enabled"
+
+	// targetGroupAttributeProxyProtocolV2Enabled is the target group attribute proxy_protocol_v2.enabled.
+	// Indicates whether Proxy Protocol version 2 is enabled.
+	// Valid values are true or false.
+	targetGroupAttributeProxyProtocolV2Enabled = "proxy_protocol_v2.enabled"
+)
+
 // awsTagNameMasterRoles is a set of well-known AWS tag names that indicate the instance is a master
 var awsTagNameMasterRoles = sets.NewString("kubernetes.io/role/master", "k8s.io/role/master")
 
@@ -2144,6 +2164,14 @@ func (c *Cloud) EnsureLoadBalancer(ctx context.Context, clusterName string, apiS
 	klog.V(2).Infof("EnsureLoadBalancer(%v, %v, %v, %v, %v, %v, %v)",
 		clusterName, apiService.Namespace, apiService.Name, c.region, apiService.Spec.LoadBalancerIP, apiService.Spec.Ports, annotations)
 
+	// pre-flight validations for EnsureLoadBalancer.
+	if err := ensureLoadBalancerValidation(&awsValidationInput{
+		apiService:  apiService,
+		annotations: annotations,
+	}); err != nil {
+		return nil, err
+	}
+
 	if apiService.Spec.SessionAffinity != v1.ServiceAffinityNone {
 		// ELB supports sticky sessions, but only when configured for HTTP/HTTPS
 		return nil, fmt.Errorf("unsupported load balancer affinity: %v", apiService.Spec.SessionAffinity)
diff --git a/pkg/providers/v1/aws_loadbalancer.go b/pkg/providers/v1/aws_loadbalancer.go
index ca95a3ab68..d45c21bd2d 100644
--- a/pkg/providers/v1/aws_loadbalancer.go
+++ b/pkg/providers/v1/aws_loadbalancer.go
@@ -400,6 +400,14 @@ func (c *Cloud) ensureLoadBalancerv2(ctx context.Context, namespacedName types.N
 			loadBalancer = &loadBalancers.LoadBalancers[0]
 		}
 	}
+
+	// Reconcile target group attributes.
+	if _, present := annotations[ServiceAnnotationLoadBalancerTargetGroupAttributes]; present {
+		if err := c.reconcileTargetGroupsAttributes(ctx, aws.ToString(loadBalancer.LoadBalancerArn), annotations); err != nil {
+			return nil, fmt.Errorf("error reconciling target group attributes: %q", err)
+		}
+	}
+
 	return loadBalancer, nil
 }
 
@@ -493,9 +501,156 @@ func (c *Cloud) reconcileLBAttributes(ctx context.Context, loadBalancerArn strin
 			return fmt.Errorf("unable to update load balancer attributes during attribute sync: %q", err)
 		}
 	}
+
+	return nil
+}
+
+// reconcileTargetGroupsAttributes reconciles the target group attributes for all target groups
+// associated with a load balancer to match the desired state specified in service annotations.
+// Only supported attributes by controller are reconciled.
+//
+// Parameters:
+// - ctx: context for AWS API calls with timeout and cancellation support
+// - lbARN: AWS load balancer ARN to identify which target groups to process
+// - annotations: service annotations containing desired target group attribute configuration
+//
+// Returns:
+// - error: validation errors, AWS API errors, or target group attribute update failures
+//
+// Documentation generated by Cursor AI
+func (c *Cloud) reconcileTargetGroupsAttributes(ctx context.Context, lbARN string, annotations map[string]string) error {
+	if len(lbARN) == 0 {
+		return fmt.Errorf("error updating target groups attributes: load balancer ARN is empty")
+	}
+
+	describeTargetGroupsOutput, err := c.elbv2.DescribeTargetGroups(ctx, &elbv2.DescribeTargetGroupsInput{
+		LoadBalancerArn: aws.String(lbARN),
+	})
+	if err != nil {
+		return fmt.Errorf("error updating target groups attributes from load balancer %q: %w", lbARN, err)
+	}
+
+	var errs []error
+	for _, tg := range describeTargetGroupsOutput.TargetGroups {
+		err := c.ensureTargetGroupAttributes(ctx, &tg, annotations)
+		if err != nil {
+			errs = append(errs, fmt.Errorf("error updating target group attributes for target group %q: %w", aws.ToString(tg.TargetGroupArn), err))
+		}
+	}
+	if len(errs) > 0 {
+		return fmt.Errorf("one or more errors occurred while updating target group attributes: %v", errs)
+	}
+	return nil
+}
+
+// ensureTargetGroupAttributes ensures that the target group attributes for a specific
+// target group match the desired state specified in service annotations.
+//
+// Parameters:
+// - ctx: context for AWS API calls and cancellation
+// - tg: target group object containing ARN, protocol, and type information
+// - annotations: service annotations containing desired target group attributes
+//
+// Returns:
+// - error: validation errors, AWS API errors, or attribute building errors
+//
+// Documentation generated by Cursor AI
+func (c *Cloud) ensureTargetGroupAttributes(ctx context.Context, tg *elbv2types.TargetGroup, annotations map[string]string) error {
+	if tg == nil {
+		return fmt.Errorf("unable to reconcile target group attributes: target group is required")
+	}
+
+	tgAttributes, err := c.elbv2.DescribeTargetGroupAttributes(ctx, &elbv2.DescribeTargetGroupAttributesInput{
+		TargetGroupArn: tg.TargetGroupArn,
+	})
+	if err != nil {
+		return fmt.Errorf("unable to retrieve target group attributes during attribute sync: %w", err)
+	}
+
+	desiredTargetGroupAttributes, err := c.buildTargetGroupAttributes(tg, tgAttributes.Attributes, annotations)
+	if err != nil {
+		return fmt.Errorf("unable to build target group attributes: %w", err)
+	}
+
+	if len(desiredTargetGroupAttributes) == 0 {
+		return nil
+	}
+	klog.Infof("Updating attributes for target group %q", aws.ToString(tg.TargetGroupArn))
+
+	if _, err = c.elbv2.ModifyTargetGroupAttributes(ctx, &elbv2.ModifyTargetGroupAttributesInput{
+		TargetGroupArn: tg.TargetGroupArn,
+		Attributes:     desiredTargetGroupAttributes,
+	}); err != nil {
+		return fmt.Errorf("unable to modify target group attributes during attribute sync: %w", err)
+	}
+	klog.Infof("Successfully updated target group attributes for %q", aws.ToString(tg.TargetGroupArn))
+
 	return nil
 }
 
+// buildTargetGroupAttributes builds the list of target group attributes that need to be modified
+// based on the Service annotation, and current attribute values, calculating only the attributes
+// to be changed.
+//
+// Supported values to annotation ServiceAnnotationLoadBalancerTargetGroupAttributes:
+// - preserve_client_ip.enabled=true|false - whether to preserve client IP addresses
+// - proxy_protocol_v2.enabled=true|false - whether to enable proxy protocol v2
+
+// Behavior when no annotations provided or removed:
+// - Target groups preserves the last set values, and skips any changes.
+//
+// Parameters:
+// - tg: target group object
+// - tgAttributes: current target group attributes from AWS resource
+// - annotations: service annotations containing desired attribute values
+//
+// Returns:
+// - []elbv2types.TargetGroupAttribute: list of attributes that need to be modified
+// - error: validation errors, parsing errors, or AWS restrictions
+//
+// Documentation generated by Cursor AI
+func (c *Cloud) buildTargetGroupAttributes(tg *elbv2types.TargetGroup, tgAttributes []elbv2types.TargetGroupAttribute, annotations map[string]string) ([]elbv2types.TargetGroupAttribute, error) {
+	errPrefix := "error building target group attributes"
+	if tg == nil {
+		return nil, fmt.Errorf("%s: target group is nil", errPrefix)
+	}
+	if tgAttributes == nil {
+		return nil, fmt.Errorf("%s: target group attributes are nil", errPrefix)
+	}
+
+	// existingAttributes are current target group attributes from AWS.
+	existingAttributes := make(map[string]string, len(tgAttributes))
+	for _, attr := range tgAttributes {
+		existingAttributes[aws.ToString(attr.Key)] = aws.ToString(attr.Value)
+	}
+
+	// annotationAttributes are the user-defined attributes set through annotations.
+	annotationAttributes := getKeyValuePropertiesFromAnnotation(annotations, ServiceAnnotationLoadBalancerTargetGroupAttributes)
+
+	// Calculate attribute difference between current and desired state.
+	var diff []elbv2types.TargetGroupAttribute
+	for attrKey, attrValue := range annotationAttributes {
+		// Skip non-supported attributes by controller.
+		if _, ok := existingAttributes[attrKey]; !ok {
+			klog.V(2).Infof("Skipping non-supported target group attribute %q", attrKey)
+			continue
+		}
+
+		// Calculate the target value: annotation override > current value.
+		if attrValue == existingAttributes[attrKey] {
+			klog.V(2).Infof("Skipping changes to target group attribute %q, values are the same: %q", attrKey, attrValue)
+			continue
+		}
+		klog.V(2).Infof("Setting from annotation the target group attribute %q value from %q to %q", attrKey, existingAttributes[attrKey], attrValue)
+
+		diff = append(diff, elbv2types.TargetGroupAttribute{
+			Key:   aws.String(attrKey),
+			Value: aws.String(attrValue),
+		})
+	}
+	return diff, nil
+}
+
 var invalidELBV2NameRegex = regexp.MustCompile("[^[:alnum:]]")
 
 // buildTargetGroupName will build unique name for targetGroup of service & port.
diff --git a/pkg/providers/v1/aws_loadbalancer_test.go b/pkg/providers/v1/aws_loadbalancer_test.go
index 0fab6469b0..569c4434db 100644
--- a/pkg/providers/v1/aws_loadbalancer_test.go
+++ b/pkg/providers/v1/aws_loadbalancer_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/aws/aws-sdk-go-v2/aws"
 	ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
 	elbtypes "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing/types"
+	elbv2 "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2"
 	elbv2types "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancingv2/types"
 	"github.com/stretchr/testify/assert"
 
@@ -1120,3 +1121,739 @@ func TestEnsureSSLNegotiationPolicyErrorHandling(t *testing.T) {
 		})
 	}
 }
+
+// Unit test generated by Cursor AI, reviewed by Human
+func TestCloud_buildTargetGroupAttributes(t *testing.T) {
+	tests := []struct {
+		name               string
+		targetGroup        *elbv2types.TargetGroup
+		existingAttributes []elbv2types.TargetGroupAttribute
+		annotations        map[string]string
+		expectedAttributes []elbv2types.TargetGroupAttribute
+		expectedError      string
+	}{
+		// Invalid AWS constraints are validated by pre-flight (validateServiceAnnotationTargetGroupAttributes).
+		// Examples:
+		// - preserve_client_ip.enabled=false for UDP target
+		// - preserve_client_ip.enabled=false for TCP_UDP target
+		// Unsupported attributes by controller are validated by pre-flight.
+		// Examples:
+		// - unsupported_attribute=value
+		// - different attribute names than supported by controller:
+		//   - preserve_client_ip.enabled
+		//   - proxy_protocol_v2.enabled
+		// Malformed annotations are validated by pre-flight.
+		// Duplicate attributes are validated by pre-flight.
+		{
+			name:        "nil target group should return error",
+			targetGroup: nil,
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			expectedError: "error building target group attributes: target group is nil",
+		},
+		{
+			name: "nil existing attributes should return error",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: nil,
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			expectedError: "error building target group attributes: target group attributes are nil",
+		},
+		{
+			name: "no target group attributes annotation",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("some_key"), Value: aws.String("some_value")},
+			},
+			annotations:        map[string]string{},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{},
+		},
+		{
+			name: "annotation parsing - empty annotation should return empty diff",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumHttp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "",
+			},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{},
+		},
+		{
+			name: "valid preserve_client_ip.enabled=true for instance target",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true",
+			},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+			},
+		},
+		{
+			name: "valid preserve_client_ip.enabled=false for IP target with TCP",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumIp,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false",
+			},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+			},
+		},
+		{
+			name: "valid proxy_protocol_v2.enabled=true",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=true",
+			},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("true")},
+			},
+		},
+		{
+			name: "multiple attributes",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+				{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+			},
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true,proxy_protocol_v2.enabled=true",
+			},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+				{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("true")},
+			},
+		},
+		{
+			name: "no changes needed - attributes match defaults",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumHttp,
+				TargetType:     elbv2types.TargetTypeEnumInstance,
+			},
+			existingAttributes: []elbv2types.TargetGroupAttribute{
+				{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+				{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+			},
+			annotations:        map[string]string{},
+			expectedAttributes: []elbv2types.TargetGroupAttribute{},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			c := &Cloud{}
+			result, err := c.buildTargetGroupAttributes(tt.targetGroup, tt.existingAttributes, tt.annotations)
+
+			if len(tt.expectedError) > 0 {
+				assert.Error(t, err)
+				assert.Contains(t, err.Error(), tt.expectedError)
+				assert.Nil(t, result)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, len(tt.expectedAttributes), len(result))
+
+				// Convert to maps for easier comparison since order might vary
+				expectedMap := make(map[string]string)
+				for _, attr := range tt.expectedAttributes {
+					expectedMap[aws.ToString(attr.Key)] = aws.ToString(attr.Value)
+				}
+
+				resultMap := make(map[string]string)
+				for _, attr := range result {
+					resultMap[aws.ToString(attr.Key)] = aws.ToString(attr.Value)
+				}
+
+				assert.Equal(t, expectedMap, resultMap)
+			}
+		})
+	}
+}
+
+// Unit test generated by Cursor AI
+func TestGetKeyValuePropertiesFromAnnotation_TargetGroupAttributes(t *testing.T) {
+	tests := []struct {
+		name        string
+		annotations map[string]string
+		annotation  string
+		expected    map[string]string
+	}{
+		{
+			name: "valid target group attributes",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true,proxy_protocol_v2.enabled=false",
+			},
+			annotation: ServiceAnnotationLoadBalancerTargetGroupAttributes,
+			expected: map[string]string{
+				"preserve_client_ip.enabled": "true",
+				"proxy_protocol_v2.enabled":  "false",
+			},
+		},
+		{
+			name: "single attribute",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true",
+			},
+			annotation: ServiceAnnotationLoadBalancerTargetGroupAttributes,
+			expected: map[string]string{
+				"preserve_client_ip.enabled": "true",
+			},
+		},
+		{
+			name: "empty annotation",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: "",
+			},
+			annotation: ServiceAnnotationLoadBalancerTargetGroupAttributes,
+			expected:   map[string]string{},
+		},
+		{
+			name: "annotation with spaces",
+			annotations: map[string]string{
+				ServiceAnnotationLoadBalancerTargetGroupAttributes: " preserve_client_ip.enabled=true , proxy_protocol_v2.enabled=false ",
+			},
+			annotation: ServiceAnnotationLoadBalancerTargetGroupAttributes,
+			expected: map[string]string{
+				"preserve_client_ip.enabled": "true",
+				"proxy_protocol_v2.enabled":  "false",
+			},
+		},
+		{
+			name: "annotation not present",
+			annotations: map[string]string{
+				"other.annotation": "value",
+			},
+			annotation: ServiceAnnotationLoadBalancerTargetGroupAttributes,
+			expected:   map[string]string{},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := getKeyValuePropertiesFromAnnotation(tt.annotations, tt.annotation)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+// Test-specific mock for ELB v2 client that embeds MockedFakeELBV2
+type mockELBV2ClientForTargetGroupAttributes struct {
+	*MockedFakeELBV2
+	describeTargetGroupsFunc          func(ctx context.Context, input *elbv2.DescribeTargetGroupsInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupsOutput, error)
+	describeTargetGroupAttributesFunc func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error)
+	modifyTargetGroupAttributesFunc   func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error)
+}
+
+func (m *mockELBV2ClientForTargetGroupAttributes) DescribeTargetGroups(ctx context.Context, input *elbv2.DescribeTargetGroupsInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupsOutput, error) {
+	if m.describeTargetGroupsFunc != nil {
+		return m.describeTargetGroupsFunc(ctx, input, optFns...)
+	}
+	// Fall back to the embedded MockedFakeELBV2 implementation
+	return m.MockedFakeELBV2.DescribeTargetGroups(ctx, input, optFns...)
+}
+
+func (m *mockELBV2ClientForTargetGroupAttributes) DescribeTargetGroupAttributes(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+	if m.describeTargetGroupAttributesFunc != nil {
+		return m.describeTargetGroupAttributesFunc(ctx, input, optFns...)
+	}
+	return nil, fmt.Errorf("DescribeTargetGroupAttributes not mocked")
+}
+
+func (m *mockELBV2ClientForTargetGroupAttributes) ModifyTargetGroupAttributes(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+	if m.modifyTargetGroupAttributesFunc != nil {
+		return m.modifyTargetGroupAttributesFunc(ctx, input, optFns...)
+	}
+	return nil, fmt.Errorf("ModifyTargetGroupAttributes not mocked")
+}
+
+// Unit test generated by Cursor AI
+func TestCloud_ensureTargetGroupAttributes(t *testing.T) {
+	testTargetGroup := &elbv2types.TargetGroup{
+		TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg/1234567890123456"),
+		Protocol:       elbv2types.ProtocolEnumHttp,
+		TargetType:     elbv2types.TargetTypeEnumInstance,
+	}
+
+	tests := []struct {
+		name                           string
+		targetGroup                    *elbv2types.TargetGroup
+		annotations                    map[string]string
+		mockDescribeTargetGroupAttribs func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error)
+		mockModifyTargetGroupAttribs   func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error)
+		expectedError                  string
+		description                    string
+	}{
+		{
+			name:          "nil target group should return error",
+			targetGroup:   nil,
+			annotations:   map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			expectedError: "unable to reconcile target group attributes: target group is required",
+			description:   "Function should validate target group is not nil before proceeding",
+		},
+		// DescribeTargetGroupAttributes failure
+		{
+			name:        "DescribeTargetGroupAttributes fails",
+			targetGroup: testTargetGroup,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false"},
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return nil, fmt.Errorf("AWS API error: target group not found")
+			},
+			expectedError: "unable to retrieve target group attributes during attribute sync",
+			description:   "Function should handle DescribeTargetGroupAttributes API failures",
+		},
+		// No changes needed - attributes match (successful case with no updates)
+		{
+			name:        "no changes needed - attributes already match desired state",
+			targetGroup: testTargetGroup,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true,proxy_protocol_v2.enabled=false"},
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // matches annotation
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // matches annotation
+					},
+				}, nil
+			},
+			description: "Function should succeed when attributes already match desired state",
+		},
+		// No changes needed - no annotations (restore defaults, but they already match)
+		{
+			name:        "no changes needed - no annotations and attributes match defaults",
+			targetGroup: testTargetGroup,
+			annotations: map[string]string{}, // No target group attributes annotation
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // matches default for instance target
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // matches default
+					},
+				}, nil
+			},
+			description: "Function should succeed when no annotation provided and attributes match defaults",
+		},
+		{
+			name:        "ModifyTargetGroupAttributes fails",
+			targetGroup: testTargetGroup,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false"},
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // different from annotation (false)
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // matches default
+					},
+				}, nil
+			},
+			mockModifyTargetGroupAttribs: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				return nil, fmt.Errorf("AWS API error: access denied")
+			},
+			expectedError: "unable to modify target group attributes during attribute sync",
+			description:   "Function should handle ModifyTargetGroupAttributes API failures",
+		},
+		// Successful case - changes needed and applied
+		{
+			name:        "successful case - attributes updated",
+			targetGroup: testTargetGroup,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=false,proxy_protocol_v2.enabled=true"},
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // different from annotation (false)
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // different from annotation (true)
+					},
+				}, nil
+			},
+			mockModifyTargetGroupAttribs: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				expectedAttributes := map[string]string{
+					"preserve_client_ip.enabled": "false",
+					"proxy_protocol_v2.enabled":  "true",
+				}
+
+				for _, attr := range input.Attributes {
+					key := aws.ToString(attr.Key)
+					value := aws.ToString(attr.Value)
+					if expectedValue, exists := expectedAttributes[key]; exists {
+						if value != expectedValue {
+							return nil, fmt.Errorf("unexpected attribute value for %s: got %s, expected %s", key, value, expectedValue)
+						}
+					}
+				}
+
+				return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+			},
+			description: "Function should successfully update target group attributes",
+		},
+		// Successful case - restore defaults
+		{
+			name: "successful case - restore defaults for IP+TCP target group",
+			targetGroup: &elbv2types.TargetGroup{
+				TargetGroupArn: aws.String("arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-ip-tg/1234567890123456"),
+				Protocol:       elbv2types.ProtocolEnumTcp,
+				TargetType:     elbv2types.TargetTypeEnumIp,
+			},
+			annotations: map[string]string{}, // No annotation - should restore defaults
+			mockDescribeTargetGroupAttribs: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // wrong, should be false for IP+TCP
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // correct default
+					},
+				}, nil
+			},
+			mockModifyTargetGroupAttribs: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				// Should restore preserve_client_ip.enabled to false for IP+TCP combination
+				for _, attr := range input.Attributes {
+					if aws.ToString(attr.Key) == "preserve_client_ip.enabled" && aws.ToString(attr.Value) == "false" {
+						return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+					}
+				}
+				return nil, fmt.Errorf("expected preserve_client_ip.enabled=false to be set")
+			},
+			description: "Function should successfully restore default values for IP+TCP target group combination",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			mockClient := &mockELBV2ClientForTargetGroupAttributes{
+				MockedFakeELBV2: &MockedFakeELBV2{
+					LoadBalancers:          []elbv2types.LoadBalancer{},
+					TargetGroups:           []elbv2types.TargetGroup{},
+					Listeners:              []elbv2types.Listener{},
+					LoadBalancerAttributes: make(map[string]map[string]string),
+					Tags:                   make(map[string][]elbv2types.Tag),
+					RegisteredInstances:    make(map[string][]string),
+				},
+				describeTargetGroupAttributesFunc: tt.mockDescribeTargetGroupAttribs,
+				modifyTargetGroupAttributesFunc:   tt.mockModifyTargetGroupAttribs,
+			}
+			c := &Cloud{
+				elbv2: mockClient,
+			}
+
+			err := c.ensureTargetGroupAttributes(context.TODO(), tt.targetGroup, tt.annotations)
+
+			if len(tt.expectedError) > 0 {
+				assert.Error(t, err, "Expected error for test case: %s", tt.description)
+				assert.Contains(t, err.Error(), tt.expectedError, "Error message should contain expected text for test case: %s", tt.description)
+			} else {
+				assert.NoError(t, err, "Expected no error for test case: %s", tt.description)
+			}
+		})
+	}
+}
+
+func TestCloud_reconcileTargetGroupsAttributes(t *testing.T) {
+	testLBARN := "arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/net/test-lb/1234567890123456"
+	testTG1ARN := "arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-1/1234567890123456"
+	testTG2ARN := "arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-2/1234567890123456"
+
+	tests := []struct {
+		name                              string
+		lbARN                             string
+		annotations                       map[string]string
+		targetGroups                      []elbv2types.TargetGroup
+		describeTargetGroupsError         error
+		describeTargetGroupAttributesFunc func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error)
+		modifyTargetGroupAttributesFunc   func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error)
+		expectedError                     string
+	}{
+		{
+			name:          "empty load balancer ARN should return error",
+			lbARN:         "",
+			annotations:   map[string]string{},
+			expectedError: "error updating target groups attributes: load balancer ARN is empty",
+		},
+		{
+			name:                      "DescribeTargetGroups API failure",
+			lbARN:                     testLBARN,
+			annotations:               map[string]string{},
+			describeTargetGroupsError: fmt.Errorf("AWS API error: access denied"),
+			expectedError:             "error updating target groups attributes from load balancer \"arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/net/test-lb/1234567890123456\": AWS API error: access denied",
+		},
+		{
+			name:         "no target groups found - success",
+			lbARN:        testLBARN,
+			annotations:  map[string]string{},
+			targetGroups: []elbv2types.TargetGroup{},
+		},
+		{
+			name:        "single target group - success",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+					},
+				}, nil
+			},
+			modifyTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+			},
+		},
+		{
+			name:        "multiple target groups - success",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "proxy_protocol_v2.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+				{
+					TargetGroupArn:   aws.String(testTG2ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumTcp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")},
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+					},
+				}, nil
+			},
+			modifyTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+			},
+		},
+		{
+			name:        "partial failure - some target groups fail",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+				{
+					TargetGroupArn:   aws.String(testTG2ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumTcp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				if aws.ToString(input.TargetGroupArn) == testTG1ARN {
+					return &elbv2.DescribeTargetGroupAttributesOutput{
+						Attributes: []elbv2types.TargetGroupAttribute{
+							{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+							{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+						},
+					}, nil
+				}
+				return nil, fmt.Errorf("target group not found")
+			},
+			modifyTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+			},
+			expectedError: "one or more errors occurred while updating target group attributes: [error updating target group attributes for target group \"arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-2/1234567890123456\": unable to retrieve target group attributes during attribute sync: target group not found]",
+		},
+		{
+			name:        "all target groups fail",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+				{
+					TargetGroupArn:   aws.String(testTG2ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumTcp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return nil, fmt.Errorf("target group not found")
+			},
+			expectedError: "one or more errors occurred while updating target group attributes: [error updating target group attributes for target group \"arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-1/1234567890123456\": unable to retrieve target group attributes during attribute sync: target group not found error updating target group attributes for target group \"arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-2/1234567890123456\": unable to retrieve target group attributes during attribute sync: target group not found]",
+		},
+		{
+			name:        "ModifyTargetGroupAttributes fails for some target groups",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+				{
+					TargetGroupArn:   aws.String(testTG2ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumTcp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("false")},
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")},
+					},
+				}, nil
+			},
+			modifyTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.ModifyTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.ModifyTargetGroupAttributesOutput, error) {
+				if aws.ToString(input.TargetGroupArn) == testTG1ARN {
+					return &elbv2.ModifyTargetGroupAttributesOutput{}, nil
+				}
+				return nil, fmt.Errorf("permission denied")
+			},
+			expectedError: "one or more errors occurred while updating target group attributes: [error updating target group attributes for target group \"arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-2/1234567890123456\": unable to modify target group attributes during attribute sync: permission denied]",
+		},
+		{
+			name:        "buildTargetGroupAttributes fails due to nil existing attributes",
+			lbARN:       testLBARN,
+			annotations: map[string]string{ServiceAnnotationLoadBalancerTargetGroupAttributes: "preserve_client_ip.enabled=true"},
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: nil, // This will cause buildTargetGroupAttributes to fail
+				}, nil
+			},
+			expectedError: "one or more errors occurred while updating target group attributes: [error updating target group attributes for target group \"arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/test-tg-1/1234567890123456\": unable to build target group attributes: error building target group attributes: target group attributes are nil]",
+		},
+		{
+			name:        "no annotations - success",
+			lbARN:       testLBARN,
+			annotations: map[string]string{}, // No target group attributes annotation
+			targetGroups: []elbv2types.TargetGroup{
+				{
+					TargetGroupArn:   aws.String(testTG1ARN),
+					LoadBalancerArns: []string{testLBARN},
+					Protocol:         elbv2types.ProtocolEnumHttp,
+					TargetType:       elbv2types.TargetTypeEnumInstance,
+				},
+			},
+			describeTargetGroupAttributesFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupAttributesInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupAttributesOutput, error) {
+				return &elbv2.DescribeTargetGroupAttributesOutput{
+					Attributes: []elbv2types.TargetGroupAttribute{
+						{Key: aws.String("preserve_client_ip.enabled"), Value: aws.String("true")}, // Already at default
+						{Key: aws.String("proxy_protocol_v2.enabled"), Value: aws.String("false")}, // Already at default
+					},
+				}, nil
+			},
+			// No ModifyTargetGroupAttributes function since no changes needed
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var mockClient *mockELBV2ClientForTargetGroupAttributes
+
+			// For empty ARN test, we don't need to set up mocks
+			if tt.lbARN != "" {
+				mockELBV2 := &MockedFakeELBV2{
+					TargetGroups: tt.targetGroups,
+				}
+
+				// Override DescribeTargetGroups if we need to simulate error
+				if tt.describeTargetGroupsError != nil {
+					// Create a custom mock that returns error for DescribeTargetGroups
+					mockClient = &mockELBV2ClientForTargetGroupAttributes{
+						MockedFakeELBV2: &MockedFakeELBV2{},
+						describeTargetGroupsFunc: func(ctx context.Context, input *elbv2.DescribeTargetGroupsInput, optFns ...func(*elbv2.Options)) (*elbv2.DescribeTargetGroupsOutput, error) {
+							return nil, tt.describeTargetGroupsError
+						},
+					}
+				} else {
+					mockClient = &mockELBV2ClientForTargetGroupAttributes{
+						MockedFakeELBV2: mockELBV2,
+					}
+				}
+
+				// Set up target group attribute functions
+				if tt.describeTargetGroupAttributesFunc != nil {
+					mockClient.describeTargetGroupAttributesFunc = tt.describeTargetGroupAttributesFunc
+				}
+				if tt.modifyTargetGroupAttributesFunc != nil {
+					mockClient.modifyTargetGroupAttributesFunc = tt.modifyTargetGroupAttributesFunc
+				}
+			}
+
+			c := &Cloud{
+				elbv2: mockClient,
+			}
+
+			err := c.reconcileTargetGroupsAttributes(context.TODO(), tt.lbARN, tt.annotations)
+			if err != nil {
+				if len(tt.expectedError) == 0 {
+					t.Fatalf("Expected no error for test case: %s, but got: %v", tt.name, err)
+				}
+				assert.Error(t, err, "Expected error for test case: %s", tt.name)
+				assert.Equal(t, tt.expectedError, err.Error(), "Error message should contain expected text for test case: %s", tt.name)
+			} else {
+				assert.NoError(t, err, "Expected no error for test case: %s", tt.name)
+			}
+		})
+	}
+}

From 70923179e1efd94e441bf790564c56967b22b5f6 Mon Sep 17 00:00:00 2001
From: Marco Braga <mrbraga@redhat.com>
Date: Tue, 29 Jul 2025 11:05:18 -0300
Subject: [PATCH 9/9] e2e/debug: increase data collection on e2e failures

---
 tests/e2e/loadbalancer.go | 118 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/loadbalancer.go b/tests/e2e/loadbalancer.go
index 2faaced271..c5971004f8 100644
--- a/tests/e2e/loadbalancer.go
+++ b/tests/e2e/loadbalancer.go
@@ -307,7 +307,23 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 
 			By("creating backend server pods")
 			_, err = e2e.LBJig.Run(e2e.buildReplicationController(tc.requireAffinity))
-			framework.ExpectNoError(err)
+			if err != nil {
+				serviceName := e2e.LBJig.Name
+				if e2e.svc != nil {
+					serviceName = e2e.svc.Name
+				}
+				framework.Logf("ERROR: LoadBalancer provisioning failed for service %q: %v", serviceName, err)
+				framework.Logf("ERROR: LoadBalancer provisioning timeout reached after %v", loadBalancerCreateTimeout)
+
+				// Ensure we have detailed debugging information before failing
+				framework.Logf("=== LoadBalancer Provisioning Failure Debug Information ===")
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of LoadBalancer Provisioning Failure Debug Information ===")
+
+				// Fail the test immediately to prevent further execution
+				framework.ExpectNoError(err, "LoadBalancer provisioning failed - check debug information above")
+			}
+
 			framework.Logf("[K8S] Backend pods created, affinity required: %t", tc.requireAffinity)
 
 			if tc.hookPostServiceCreate != nil {
@@ -323,9 +339,17 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 				framework.Failf("Service is nil after LoadBalancer provisioning for service %s", e2e.LBJig.Name)
 			}
 			if len(e2e.svc.Spec.Ports) == 0 {
+				framework.Logf("=== Service Ports Error Debug Information ===")
+				framework.Logf("Service spec: %+v", e2e.svc.Spec)
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of Service Ports Error Debug Information ===")
 				framework.Failf("No ports found in service spec for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
 			}
 			if len(e2e.svc.Status.LoadBalancer.Ingress) == 0 {
+				framework.Logf("=== LoadBalancer Ingress Error Debug Information ===")
+				framework.Logf("Service status: %+v", e2e.svc.Status)
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of LoadBalancer Ingress Error Debug Information ===")
 				framework.Failf("No ingress found in LoadBalancer status for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
 			}
 
@@ -333,6 +357,14 @@ var _ = Describe("[cloud-provider-aws-e2e] loadbalancer", func() {
 			ingressAddress := e2eservice.GetIngressPoint(&e2e.svc.Status.LoadBalancer.Ingress[0])
 			framework.Logf("[LB-INFO] Ingress address: %s, port: %d", ingressAddress, svcPort)
 
+			if ingressAddress == "" {
+				framework.Logf("=== Empty Ingress Address Debug Information ===")
+				framework.Logf("LoadBalancer ingress[0]: %+v", e2e.svc.Status.LoadBalancer.Ingress[0])
+				gatherEventosOnFailure(e2e.ctx, e2e.kubeClient, e2e.LBJig.Namespace, e2e.LBJig.Name)
+				framework.Logf("=== End of Empty Ingress Address Debug Information ===")
+				framework.Failf("LoadBalancer ingress address is empty for service %s/%s", e2e.svc.Namespace, e2e.svc.Name)
+			}
+
 			if tc.hookPreTest != nil {
 				By("executing pre-test hook")
 				tc.hookPreTest(e2e)
@@ -840,3 +872,87 @@ func inClusterTestReachableHTTP(cs clientset.Interface, namespace, nodeName, tar
 
 	return nil
 }
+
+// Gather information from the cluster to help debug failures.
+// - Resource events
+// - All namespace events
+// - Cloud controller manager logs
+// - Service status
+func gatherResourceEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
+	framework.Logf("=== Collecting resource events for debugging ===")
+	events, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{
+		FieldSelector: "involvedObject.name=" + resourceName,
+	})
+	if err != nil {
+		framework.Logf("Error getting events for resource %q: %v", resourceName, err)
+	} else {
+		framework.Logf("Resource events for %q:", resourceName)
+		for _, event := range events.Items {
+			framework.Logf("  [%s] %s/%s: %s - %s", event.Type, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
+		}
+	}
+}
+
+func gatherAllEvents(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
+	framework.Logf("=== Collecting all namespace events ===")
+	allEvents, err := cs.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		framework.Logf("Error getting all namespace events: %v", err)
+	} else {
+		framework.Logf("All events in namespace %q:", namespace)
+		for _, event := range allEvents.Items {
+			if strings.Contains(event.Message, "loadbalancer") || strings.Contains(event.Message, "LoadBalancer") ||
+				strings.Contains(event.Reason, "LoadBalancer") || strings.Contains(event.Source.Component, "cloud-controller-manager") {
+				framework.Logf("  [%s] %s/%s/%s: %s - %s", event.Type, event.Source.Component, event.Reason, event.InvolvedObject.Name, event.Message, event.FirstTimestamp)
+			}
+		}
+	}
+}
+
+func gatherControllerLogs(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
+	framework.Logf("=== Collecting cloud controller manager logs ===")
+	ccmPods, err := cs.CoreV1().Pods("").List(ctx, metav1.ListOptions{
+		LabelSelector: "app=cloud-controller-manager",
+	})
+	if err != nil {
+		framework.Logf("Error listing cloud controller manager pods: %v", err)
+	} else {
+		for _, pod := range ccmPods.Items {
+			framework.Logf("Found CCM pod: %s/%s (phase: %s)", pod.Namespace, pod.Name, pod.Status.Phase)
+
+			// Get recent logs (last 50 lines)
+			tailLines := int64(50)
+			logOpts := &v1.PodLogOptions{
+				TailLines: &tailLines,
+				Previous:  false,
+			}
+			logs, err1 := cs.CoreV1().Pods(pod.Namespace).GetLogs(pod.Name, logOpts).DoRaw(ctx)
+			if err1 != nil {
+				framework.Logf("Error getting logs for CCM pod %s/%s: %v", pod.Namespace, pod.Name, err)
+			} else {
+				framework.Logf("Recent logs from CCM pod %s/%s:", pod.Namespace, pod.Name)
+				framework.Logf("%s", string(logs))
+			}
+		}
+	}
+}
+
+func gatherServiceStatus(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
+	framework.Logf("=== Service Status ===")
+	currentSvc, err := cs.CoreV1().Services(namespace).Get(ctx, resourceName, metav1.GetOptions{})
+	if err != nil {
+		framework.Logf("Error getting current service status: %v", err)
+	} else {
+		framework.Logf("Service %s status:", currentSvc.Name)
+		framework.Logf("  Annotations: %+v", currentSvc.Annotations)
+		framework.Logf("  LoadBalancer status: %+v", currentSvc.Status.LoadBalancer)
+		framework.Logf("  Conditions: %+v", currentSvc.Status.Conditions)
+	}
+}
+
+func gatherEventosOnFailure(ctx context.Context, cs clientset.Interface, namespace, resourceName string) {
+	gatherResourceEvents(ctx, cs, namespace, resourceName)
+	gatherAllEvents(ctx, cs, namespace, resourceName)
+	gatherControllerLogs(ctx, cs, namespace, resourceName)
+	gatherServiceStatus(ctx, cs, namespace, resourceName)
+}