Skip to content

Commit 96d3e33

Browse files
p0lyn0mialbertinatto
authored andcommitted
UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready
the availability checks depend on fully initialized SDN OpenShift carries a few reachability checks that affect /readyz protocol we skip posting failures to avoid getting false positives until the server becomes ready UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready marks availability of the server before checking the aggregate APIs as it can change as we are running the checks. in that case, skip posting failures to avoid false positives. note on the next rebase please squash with the previous commit UPSTREAM: <carry>: expose HasBeenReady lifecycle signal OpenShift-Rebase-Source: 8558e88
1 parent 7d0b914 commit 96d3e33

File tree

4 files changed

+36
-1
lines changed

4 files changed

+36
-1
lines changed

staging/src/k8s.io/apiserver/pkg/server/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,11 @@ func (c *Config) ShutdownInitiatedNotify() <-chan struct{} {
701701
return c.lifecycleSignals.ShutdownInitiated.Signaled()
702702
}
703703

704+
// HasBeenReadySignal exposes a server's lifecycle signal which is signaled when the readyz endpoint succeeds for the first time.
705+
func (c *Config) HasBeenReadySignal() <-chan struct{} {
706+
return c.lifecycleSignals.HasBeenReady.Signaled()
707+
}
708+
704709
// Complete fills in any fields not set that are required to have valid data and can be derived
705710
// from other fields. If you're going to `ApplyOptions`, do that first. It's mutating the receiver.
706711
func (c *Config) Complete(informers informers.SharedInformerFactory) CompletedConfig {

staging/src/k8s.io/kube-aggregator/pkg/apiserver/apiserver.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@ func (c completedConfig) NewWithDelegate(delegationTarget genericapiserver.Deleg
348348
(func() ([]byte, []byte))(s.proxyCurrentCertKeyContent),
349349
s.serviceResolver,
350350
metrics,
351+
c.GenericConfig.HasBeenReadySignal(),
351352
)
352353
if err != nil {
353354
return nil, err

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/remote/remote_available_controller.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ type AvailableConditionController struct {
8787

8888
// metrics registered into legacy registry
8989
metrics *availabilitymetrics.Metrics
90+
91+
// hasBeenReady is signaled when the readyz endpoint succeeds for the first time.
92+
hasBeenReady <-chan struct{}
9093
}
9194

9295
// New returns a new remote APIService AvailableConditionController.
@@ -99,6 +102,7 @@ func New(
99102
proxyCurrentCertKeyContent certKeyFunc,
100103
serviceResolver ServiceResolver,
101104
metrics *availabilitymetrics.Metrics,
105+
hasBeenReady <-chan struct{},
102106
) (*AvailableConditionController, error) {
103107

104108
endpointSliceGetter, err := proxy.NewEndpointSliceIndexerGetter(endpointSliceInformer)
@@ -122,6 +126,7 @@ func New(
122126
proxyTransportDial: proxyTransportDial,
123127
proxyCurrentCertKeyContent: proxyCurrentCertKeyContent,
124128
metrics: metrics,
129+
hasBeenReady: hasBeenReady,
125130
}
126131

127132
// resync on this one because it is low cardinality and rechecking the actual discovery
@@ -171,6 +176,18 @@ func (c *AvailableConditionController) sync(key string) error {
171176
return nil
172177
}
173178

179+
// the availability checks depend on fully initialized SDN
180+
// OpenShift carries a few reachability checks that affect /readyz protocol
181+
// record availability of the server so that we can
182+
// skip posting failures to avoid getting false positives until the server becomes ready
183+
hasBeenReady := false
184+
select {
185+
case <-c.hasBeenReady:
186+
hasBeenReady = true
187+
default:
188+
// continue, we will skip posting only potential failures
189+
}
190+
174191
apiService := originalAPIService.DeepCopy()
175192

176193
// if a particular transport was specified, use that otherwise build one
@@ -361,6 +378,11 @@ func (c *AvailableConditionController) sync(key string) error {
361378
}
362379

363380
if lastError != nil {
381+
if !hasBeenReady {
382+
// returning an error will requeue the item in an exponential fashion
383+
return fmt.Errorf("the server hasn't been ready yet, skipping updating availability of the aggreaged API until the server becomes ready to avoid false positives, lastError = %v", lastError)
384+
}
385+
364386
availableCondition.Status = apiregistrationv1.ConditionFalse
365387
availableCondition.Reason = "FailedDiscoveryCheck"
366388
availableCondition.Message = lastError.Error()

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/remote/remote_available_controller_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
148148
}
149149
}
150150

151+
alwaysReadyChan := make(chan struct{})
152+
close(alwaysReadyChan)
153+
151154
c := AvailableConditionController{
152155
apiServiceClient: fakeClient.ApiregistrationV1(),
153156
apiServiceLister: listers.NewAPIServiceLister(apiServiceIndexer),
@@ -161,7 +164,8 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
161164
workqueue.NewTypedItemExponentialFailureRateLimiter[string](5*time.Millisecond, 30*time.Second),
162165
workqueue.TypedRateLimitingQueueConfig[string]{Name: "AvailableConditionController"},
163166
),
164-
metrics: availabilitymetrics.New(),
167+
metrics: availabilitymetrics.New(),
168+
hasBeenReady: alwaysReadyChan,
165169
}
166170
for _, svc := range apiServices {
167171
c.addAPIService(svc)
@@ -439,6 +443,8 @@ func TestSync(t *testing.T) {
439443
w.WriteHeader(tc.backendStatus)
440444
}))
441445
defer testServer.Close()
446+
alwaysReadyChan := make(chan struct{})
447+
close(alwaysReadyChan)
442448

443449
c := AvailableConditionController{
444450
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -448,6 +454,7 @@ func TestSync(t *testing.T) {
448454
serviceResolver: &fakeServiceResolver{url: testServer.URL},
449455
proxyCurrentCertKeyContent: func() ([]byte, []byte) { return emptyCert(), emptyCert() },
450456
metrics: availabilitymetrics.New(),
457+
hasBeenReady: alwaysReadyChan,
451458
}
452459
err = c.sync(tc.apiServiceName)
453460
if tc.expectedSyncError != "" {

0 commit comments

Comments
 (0)