-
Notifications
You must be signed in to change notification settings - Fork 259
events: make kube broadcaster shutdown gracefully and tune correlator so we don't loose events #777
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ import ( | |
| "sync" | ||
| "time" | ||
|
|
||
| "k8s.io/client-go/tools/record" | ||
| "k8s.io/component-base/metrics" | ||
| "k8s.io/component-base/metrics/legacyregistry" | ||
| "k8s.io/klog" | ||
|
|
@@ -64,6 +65,7 @@ type ControllerBuilder struct { | |
| leaderElection *configv1.LeaderElection | ||
| fileObserver fileobserver.Observer | ||
| fileObserverReactorFn func(file string, action fileobserver.ActionType) error | ||
| eventRecorderOptions record.CorrelatorOptions | ||
|
|
||
| startFunc StartFunc | ||
| componentName string | ||
|
|
@@ -175,6 +177,14 @@ func (b *ControllerBuilder) WithInstanceIdentity(identity string) *ControllerBui | |
| return b | ||
| } | ||
|
|
||
| // WithEventRecorderOptions allows to override the default Kubernetes event recorder correlator options. | ||
| // This is needed if the binary is sending a lot of events. | ||
| // Using events.DefaultOperatorEventRecorderOptions here makes a good default for normal operator binary. | ||
| func (b *ControllerBuilder) WithEventRecorderOptions(options record.CorrelatorOptions) *ControllerBuilder { | ||
| b.eventRecorderOptions = options | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I expected to set it to the recommended options by default. Is that the case?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess https://github.com/openshift/library-go/pull/777/files#diff-ff937793ae2933db156923ac8eaebbd8R271 is doing this, right? |
||
| return b | ||
| } | ||
|
|
||
| // Run starts your controller for you. It uses leader election if you asked, otherwise it directly calls you | ||
| func (b *ControllerBuilder) Run(ctx context.Context, config *unstructured.Unstructured) error { | ||
| clientConfig, err := b.getClientConfig() | ||
|
|
@@ -195,7 +205,7 @@ func (b *ControllerBuilder) Run(ctx context.Context, config *unstructured.Unstru | |
| if err != nil { | ||
| klog.Warningf("unable to get owner reference (falling back to namespace): %v", err) | ||
| } | ||
| eventRecorder := events.NewKubeRecorder(kubeClient.CoreV1().Events(namespace), b.componentName, controllerRef) | ||
| eventRecorder := events.NewKubeRecorderWithOptions(kubeClient.CoreV1().Events(namespace), b.eventRecorderOptions, b.componentName, controllerRef) | ||
|
|
||
| // if there is file observer defined for this command, add event into default reaction function. | ||
| if b.fileObserverReactorFn != nil { | ||
|
|
@@ -298,6 +308,7 @@ func (b ControllerBuilder) getOnStartedLeadingFunc(controllerContext *Controller | |
|
|
||
| select { | ||
| case <-ctx.Done(): // context closed means the process likely received signal to terminate | ||
| controllerContext.EventRecorder.Shutdown() | ||
| case <-stoppedCh: | ||
| // if context was not cancelled (it is not "done"), but the startFunc terminated, it means it terminated prematurely | ||
| // when this happen, it means the controllers terminated without error. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,45 +2,120 @@ package events | |
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "k8s.io/klog" | ||
| "strings" | ||
| "sync" | ||
|
|
||
| corev1 "k8s.io/api/core/v1" | ||
| "k8s.io/client-go/kubernetes/scheme" | ||
| corev1client "k8s.io/client-go/kubernetes/typed/core/v1" | ||
| "k8s.io/client-go/tools/record" | ||
| "k8s.io/component-base/metrics" | ||
| "k8s.io/component-base/metrics/legacyregistry" | ||
| "k8s.io/klog" | ||
| ) | ||
|
|
||
| // NewKubeRecorder returns new event recorder. | ||
| func NewKubeRecorder(client corev1client.EventInterface, sourceComponentName string, involvedObjectRef *corev1.ObjectReference) Recorder { | ||
| // NewKubeRecorder returns new event recorder with tweaked correlator options. | ||
| func NewKubeRecorderWithOptions(client corev1client.EventInterface, options record.CorrelatorOptions, sourceComponentName string, involvedObjectRef *corev1.ObjectReference) Recorder { | ||
| return (&upstreamRecorder{ | ||
| client: client, | ||
| component: sourceComponentName, | ||
| involvedObjectRef: involvedObjectRef, | ||
| options: options, | ||
| fallbackRecorder: NewRecorder(client, sourceComponentName, involvedObjectRef), | ||
| }).ForComponent(sourceComponentName) | ||
| } | ||
|
|
||
| // NewKubeRecorder returns new event recorder with default correlator options. | ||
| func NewKubeRecorder(client corev1client.EventInterface, sourceComponentName string, involvedObjectRef *corev1.ObjectReference) Recorder { | ||
| return NewKubeRecorderWithOptions(client, record.CorrelatorOptions{}, sourceComponentName, involvedObjectRef) | ||
| } | ||
|
|
||
| // upstreamRecorder is an implementation of Recorder interface. | ||
| type upstreamRecorder struct { | ||
| client corev1client.EventInterface | ||
| component string | ||
| broadcaster record.EventBroadcaster | ||
| eventRecorder record.EventRecorder | ||
| involvedObjectRef *corev1.ObjectReference | ||
| options record.CorrelatorOptions | ||
|
|
||
| // shuttingDown indicates that the broadcaster for this recorder is being shut down | ||
| shuttingDown bool | ||
| shutdownMutex sync.RWMutex | ||
|
|
||
| // fallbackRecorder is used when the kube recorder is shutting down | ||
| // in that case we create the events directly. | ||
| fallbackRecorder Recorder | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh what a wicked web we weave
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i can just log the events that come after shutdown... i think the chance we leak events after shutdown is triggered is really small (the window is basically ~1-2s, but this makes sure we don't miss any event at all... |
||
| } | ||
|
|
||
| // RecommendedClusterSingletonCorrelatorOptions provides recommended event correlator options for components that produce | ||
| // many events (like operators). | ||
| func RecommendedClusterSingletonCorrelatorOptions() record.CorrelatorOptions { | ||
| return record.CorrelatorOptions{ | ||
| BurstSize: 60, // default: 25 (change allows a single source to send 50 events about object per minute) | ||
| QPS: 1. / 1., // default: 1/300 (change allows refill rate to 1 new event every 1s) | ||
| KeyFunc: func(event *corev1.Event) (aggregateKey string, localKey string) { | ||
| return strings.Join([]string{ | ||
| event.Source.Component, | ||
| event.Source.Host, | ||
| event.InvolvedObject.Kind, | ||
| event.InvolvedObject.Namespace, | ||
| event.InvolvedObject.Name, | ||
| string(event.InvolvedObject.UID), | ||
| event.InvolvedObject.APIVersion, | ||
| event.Type, | ||
| event.Reason, | ||
| // By default, KeyFunc don't use message for aggregation, this cause events with different message, but same reason not be lost as "similar events". | ||
| event.Message, | ||
| }, ""), event.Message | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| var eventsCounterMetric = metrics.NewCounterVec(&metrics.CounterOpts{ | ||
| Subsystem: "event_recorder", | ||
| Name: "total_events_count", | ||
| Help: "Total count of events processed by this event recorder per involved object", | ||
| StabilityLevel: metrics.ALPHA, | ||
| }, []string{"severity"}) | ||
|
|
||
| func init() { | ||
| (&sync.Once{}).Do(func() { | ||
| legacyregistry.MustRegister(eventsCounterMetric) | ||
| }) | ||
| } | ||
|
|
||
| func (r *upstreamRecorder) ForComponent(componentName string) Recorder { | ||
| newRecorderForComponent := *r | ||
| broadcaster := record.NewBroadcaster() | ||
| newRecorderForComponent := upstreamRecorder{ | ||
| client: r.client, | ||
| fallbackRecorder: r.fallbackRecorder.WithComponentSuffix(componentName), | ||
| options: r.options, | ||
| involvedObjectRef: r.involvedObjectRef, | ||
| shuttingDown: r.shuttingDown, | ||
| } | ||
|
|
||
| // tweak the event correlator, so we don't loose important events. | ||
| broadcaster := record.NewBroadcasterWithCorrelatorOptions(r.options) | ||
| broadcaster.StartLogging(klog.Infof) | ||
| broadcaster.StartRecordingToSink(&corev1client.EventSinkImpl{Interface: newRecorderForComponent.client}) | ||
|
|
||
| newRecorderForComponent.eventRecorder = broadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: componentName}) | ||
| newRecorderForComponent.broadcaster = broadcaster | ||
| newRecorderForComponent.component = componentName | ||
|
|
||
| return &newRecorderForComponent | ||
| } | ||
|
|
||
| func (r *upstreamRecorder) Shutdown() { | ||
| r.shutdownMutex.Lock() | ||
| r.shuttingDown = true | ||
| r.shutdownMutex.Unlock() | ||
| // Wait for broadcaster to flush events (this is blocking) | ||
| // TODO: There is still race condition in upstream that might cause panic() on events recorded after the shutdown | ||
| // is called as the event recording is not-blocking (go routine based). | ||
| r.broadcaster.Shutdown() | ||
| } | ||
|
|
||
| func (r *upstreamRecorder) WithComponentSuffix(suffix string) Recorder { | ||
| return r.ForComponent(fmt.Sprintf("%s-%s", r.ComponentName(), suffix)) | ||
| } | ||
|
|
@@ -59,12 +134,33 @@ func (r *upstreamRecorder) Warningf(reason, messageFmt string, args ...interface | |
| r.Warning(reason, fmt.Sprintf(messageFmt, args...)) | ||
| } | ||
|
|
||
| func (r *upstreamRecorder) incrementEventsCounter(severity string) { | ||
| if r.involvedObjectRef == nil { | ||
| return | ||
| } | ||
| eventsCounterMetric.WithLabelValues(severity).Inc() | ||
| } | ||
|
|
||
| // Event emits the normal type event. | ||
| func (r *upstreamRecorder) Event(reason, message string) { | ||
| r.shutdownMutex.RLock() | ||
| defer r.shutdownMutex.RUnlock() | ||
| defer r.incrementEventsCounter(corev1.EventTypeNormal) | ||
| if r.shuttingDown { | ||
| r.fallbackRecorder.Event(reason, message) | ||
| return | ||
| } | ||
| r.eventRecorder.Event(r.involvedObjectRef, corev1.EventTypeNormal, reason, message) | ||
| } | ||
|
|
||
| // Warning emits the warning type event. | ||
| func (r *upstreamRecorder) Warning(reason, message string) { | ||
| r.shutdownMutex.RLock() | ||
| defer r.shutdownMutex.RUnlock() | ||
| defer r.incrementEventsCounter(corev1.EventTypeWarning) | ||
| if r.shuttingDown { | ||
| r.fallbackRecorder.Warning(reason, message) | ||
| return | ||
| } | ||
| r.eventRecorder.Event(r.involvedObjectRef, corev1.EventTypeWarning, reason, message) | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.