From c374806a2cd76c89128373a4b2f2c035ca71b13b Mon Sep 17 00:00:00 2001 From: Ben Luddy Date: Fri, 2 May 2025 17:05:18 -0400 Subject: [PATCH] Enable NLB connection draining for graceful apiserver shutdown. The kube-apiserver expects to terminate connections itself during graceful shutdown. As soon as kube-apiserver has received SIGTERM, its /readyz endpoint begins serving HTTP 500 responses. To allow time for load balancers to mark it unhealthy, it continues accepting new connections and serving requests on existing connections for a period of time (controlled by the --shutdown-delay-duration option). Once the shutdown delay has elapsed, it stops accepting new requests and drains in-flight requests before exiting. By default, NLBs immediately terminate established connections when a target becomes unhealthy. This causes client-facing disruption for clients connected via NLB to a kube-apiserver instance that is shutting down. --- api/v1beta2/network_types.go | 8 +++ controllers/helpers_test.go | 8 +++ pkg/cloud/services/elb/loadbalancer.go | 32 ++++++++--- pkg/cloud/services/elb/loadbalancer_test.go | 61 +++++++++++++++++++++ 4 files changed, 101 insertions(+), 8 deletions(-) diff --git a/api/v1beta2/network_types.go b/api/v1beta2/network_types.go index 9974cbddb1..00a775a54e 100644 --- a/api/v1beta2/network_types.go +++ b/api/v1beta2/network_types.go @@ -207,6 +207,14 @@ type TargetGroupAttribute string var ( // TargetGroupAttributeEnablePreserveClientIP defines the attribute key for enabling preserve client IP. TargetGroupAttributeEnablePreserveClientIP = "preserve_client_ip.enabled" + + // TargetGroupAttributeEnableConnectionTermination defines the attribute key for terminating + // established connections to unhealthy targets. + TargetGroupAttributeEnableConnectionTermination = "target_health_state.unhealthy.connection_termination.enabled" + + // TargetGroupAttributeUnhealthyDrainingIntervalSeconds defines the attribute key for the + // unhealthy target connection draining interval. + TargetGroupAttributeUnhealthyDrainingIntervalSeconds = "target_health_state.unhealthy.draining_interval_seconds" ) // LoadBalancerAttribute defines a set of attributes for a V2 load balancer. diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index d68669c407..05f103cfb6 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -393,6 +393,14 @@ func mockedModifyTargetGroupAttributes(t *testing.T, m *mocks.MockELBV2APIMockRe m.ModifyTargetGroupAttributes(gomock.Any(), gomock.Eq(&elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: tgArn, Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), diff --git a/pkg/cloud/services/elb/loadbalancer.go b/pkg/cloud/services/elb/loadbalancer.go index 25aefa9b4d..6887692dd3 100644 --- a/pkg/cloud/services/elb/loadbalancer.go +++ b/pkg/cloud/services/elb/loadbalancer.go @@ -1687,16 +1687,32 @@ func (s *Service) reconcileTargetGroupsAndListeners(ctx context.Context, lbARN s } createdTargetGroups = append(createdTargetGroups, group) + targetGroupAttributeInput := &elbv2.ModifyTargetGroupAttributesInput{TargetGroupArn: group.TargetGroupArn} + + if lbSpec.LoadBalancerType == infrav1.LoadBalancerTypeNLB { + targetGroupAttributeInput.Attributes = append(targetGroupAttributeInput.Attributes, + elbv2types.TargetGroupAttribute{ + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + elbv2types.TargetGroupAttribute{ + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, + ) + } + if !lbSpec.PreserveClientIP { - targetGroupAttributeInput := &elbv2.ModifyTargetGroupAttributesInput{ - TargetGroupArn: group.TargetGroupArn, - Attributes: []elbv2types.TargetGroupAttribute{ - { - Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), - Value: aws.String("false"), - }, + targetGroupAttributeInput.Attributes = append(targetGroupAttributeInput.Attributes, + elbv2types.TargetGroupAttribute{ + Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), + Value: aws.String("false"), }, - } + ) + } + + if len(targetGroupAttributeInput.Attributes) > 0 { + s.scope.Debug("configuring target group attributes", "attributes", targetGroupAttributeInput) if _, err := s.ELBV2Client.ModifyTargetGroupAttributes(ctx, targetGroupAttributeInput); err != nil { return nil, nil, errors.Wrapf(err, "failed to modify target group attribute") } diff --git a/pkg/cloud/services/elb/loadbalancer_test.go b/pkg/cloud/services/elb/loadbalancer_test.go index 805167b3b7..5bf8baae0a 100644 --- a/pkg/cloud/services/elb/loadbalancer_test.go +++ b/pkg/cloud/services/elb/loadbalancer_test.go @@ -1543,6 +1543,14 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), gomock.Eq(&elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), @@ -1661,6 +1669,14 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), &elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), @@ -1767,6 +1783,14 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), &elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), @@ -1860,6 +1884,19 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { }, }, }, nil) + m.ModifyTargetGroupAttributes(gomock.Any(), gomock.Eq(&elbv2.ModifyTargetGroupAttributesInput{ + TargetGroupArn: aws.String(tgArn), + Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, + }, + })).Return(nil, nil) m.DescribeListeners(gomock.Any(), &elbv2.DescribeListenersInput{ LoadBalancerArn: aws.String(elbArn), }).Return(&elbv2.DescribeListenersOutput{ @@ -2000,6 +2037,14 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), &elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), @@ -2115,6 +2160,14 @@ func TestReconcileTargetGroupsAndListeners(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), &elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"), @@ -2403,6 +2456,14 @@ func TestReconcileV2LB(t *testing.T) { m.ModifyTargetGroupAttributes(gomock.Any(), gomock.Eq(&elbv2.ModifyTargetGroupAttributesInput{ TargetGroupArn: aws.String(tgArn), Attributes: []elbv2types.TargetGroupAttribute{ + { + Key: aws.String(infrav1.TargetGroupAttributeEnableConnectionTermination), + Value: aws.String("false"), + }, + { + Key: aws.String(infrav1.TargetGroupAttributeUnhealthyDrainingIntervalSeconds), + Value: aws.String("300"), + }, { Key: aws.String(infrav1.TargetGroupAttributeEnablePreserveClientIP), Value: aws.String("false"),