Skip to content

Commit 469d2bd

Browse files
ingvagabundopenshift-cherrypick-robot
authored andcommitted
UPSTREAM: <carry>: disable etcd readiness checks by default
Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177) and have etcd operator take responsibility for properly reporting etcd readiness. Justification: kube-apiserver instances get removed from a load balancer when etcd starts to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness longer than the readiness timeout is. Thus, it is not necessary to drop connections in case etcd resumes its readiness before a client connection times out naturally. This is a downstream patch only as OpenShift's way of using etcd is unique.
1 parent e18ecb4 commit 469d2bd

File tree

4 files changed

+34
-6
lines changed

4 files changed

+34
-6
lines changed

staging/src/k8s.io/apiserver/pkg/server/config.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,13 +574,27 @@ type CompletedConfig struct {
574574
func (c *Config) AddHealthChecks(healthChecks ...healthz.HealthChecker) {
575575
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
576576
c.LivezChecks = append(c.LivezChecks, healthChecks...)
577-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
577+
c.AddReadyzChecks(healthChecks...)
578578
}
579579

580580
// AddReadyzChecks adds a health check to our config to be exposed by the readyz endpoint
581581
// of our configured apiserver.
582582
func (c *Config) AddReadyzChecks(healthChecks ...healthz.HealthChecker) {
583-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
583+
// Info(ingvagabund): Explicitly exclude etcd and etcd-readiness checks (OCPBUGS-48177)
584+
// and have etcd operator take responsibility for properly reporting etcd readiness.
585+
// Justification: kube-apiserver instances get removed from a load balancer when etcd starts
586+
// to report not ready (as will KA's /readyz). Client connections can withstand etcd unreadiness
587+
// longer than the readiness timeout is. Thus, it is not necessary to drop connections
588+
// in case etcd resumes its readiness before a client connection times out naturally.
589+
// This is a downstream patch only as OpenShift's way of using etcd is unique.
590+
readyzChecks := []healthz.HealthChecker{}
591+
for _, check := range healthChecks {
592+
if check.Name() == "etcd" || check.Name() == "etcd-readiness" {
593+
continue
594+
}
595+
readyzChecks = append(readyzChecks, check)
596+
}
597+
c.ReadyzChecks = append(c.ReadyzChecks, readyzChecks...)
584598
}
585599

586600
// AddPostStartHook allows you to add a PostStartHook that will later be added to the server itself in a New call.

staging/src/k8s.io/apiserver/pkg/server/options/etcd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ func (s *EtcdOptions) maybeApplyResourceTransformers(c *server.Config) (err erro
355355

356356
func addHealthChecksWithoutLivez(c *server.Config, healthChecks ...healthz.HealthChecker) {
357357
c.HealthzChecks = append(c.HealthzChecks, healthChecks...)
358-
c.ReadyzChecks = append(c.ReadyzChecks, healthChecks...)
358+
c.AddReadyzChecks(healthChecks...)
359359
}
360360

361361
func (s *EtcdOptions) addEtcdHealthEndpoint(c *server.Config) error {

staging/src/k8s.io/apiserver/pkg/server/options/etcd_test.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,17 @@ func TestParseWatchCacheSizes(t *testing.T) {
262262
}
263263
}
264264

265+
func excludeEtcdReadyzChecks(readyzChecks []string) []string {
266+
includedReadyzChecks := []string{}
267+
for _, checkName := range readyzChecks {
268+
if checkName == "etcd" || checkName == "etcd-readiness" {
269+
continue
270+
}
271+
includedReadyzChecks = append(includedReadyzChecks, checkName)
272+
}
273+
return includedReadyzChecks
274+
}
275+
265276
func TestKMSHealthzEndpoint(t *testing.T) {
266277
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KMSv1, true)
267278

@@ -367,7 +378,9 @@ func TestKMSHealthzEndpoint(t *testing.T) {
367378
}
368379

369380
healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
370-
healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
381+
// Remove the excluded checks here to reduce the carry patch changes in case
382+
// the changes drifts too much during rebases and similar scope-like changes.
383+
healthChecksAreEqual(t, excludeEtcdReadyzChecks(tc.wantReadyzChecks), serverConfig.ReadyzChecks, "readyz")
371384
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
372385
})
373386
}
@@ -407,7 +420,9 @@ func TestReadinessCheck(t *testing.T) {
407420
t.Fatalf("Failed to add healthz error: %v", err)
408421
}
409422

410-
healthChecksAreEqual(t, tc.wantReadyzChecks, serverConfig.ReadyzChecks, "readyz")
423+
// Remove the excluded checks here to reduce the carry patch changes in case
424+
// the changes drifts too much during rebases and similar scope-like changes.
425+
healthChecksAreEqual(t, excludeEtcdReadyzChecks(tc.wantReadyzChecks), serverConfig.ReadyzChecks, "readyz")
411426
healthChecksAreEqual(t, tc.wantHealthzChecks, serverConfig.HealthzChecks, "healthz")
412427
healthChecksAreEqual(t, tc.wantLivezChecks, serverConfig.LivezChecks, "livez")
413428
})

test/e2e/apimachinery/health_handlers.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ var (
8282
requiredReadyzChecks = sets.NewString(
8383
"[+]ping ok",
8484
"[+]log ok",
85-
"[+]etcd ok",
8685
"[+]informer-sync ok",
8786
"[+]poststarthook/start-apiserver-admission-initializer ok",
8887
"[+]poststarthook/generic-apiserver-start-informers ok",

0 commit comments

Comments
 (0)