diff --git a/lib/tbot/readyz/readyz.go b/lib/tbot/readyz/readyz.go index 4462c7c1fe4bc..7682c56483b0c 100644 --- a/lib/tbot/readyz/readyz.go +++ b/lib/tbot/readyz/readyz.go @@ -24,6 +24,7 @@ import "sync" func NewRegistry() *Registry { return &Registry{ services: make(map[string]*ServiceStatus), + notifyCh: make(chan struct{}), } } @@ -31,6 +32,8 @@ func NewRegistry() *Registry { type Registry struct { mu sync.Mutex services map[string]*ServiceStatus + reported int + notifyCh chan struct{} } // AddService adds a service to the registry so that its health will be reported @@ -45,9 +48,11 @@ func (r *Registry) AddService(name string) Reporter { status = &ServiceStatus{} r.services[name] = status } + return &reporter{ mu: &r.mu, status: status, + notify: sync.OnceFunc(r.maybeNotifyLocked), } } @@ -87,6 +92,31 @@ func (r *Registry) OverallStatus() *OverallStatus { } } +// AllServicesReported returns a channel you can receive from to be notified +// when all registered services have reported their initial status. It provides +// a way for us to hold the initial heartbeat until after the initial flurry of +// activity. +func (r *Registry) AllServicesReported() <-chan struct{} { return r.notifyCh } + +// maybeNotifyLocked unblocks the AllServicesReported channel if all services +// have reported their initial status. It's called by each of the Reporters the +// first time you report a status. +// +// Caller must be holding r.mu. +func (r *Registry) maybeNotifyLocked() { + r.reported++ + + if r.reported != len(r.services) { + return + } + + select { + case <-r.notifyCh: + default: + close(r.notifyCh) + } +} + // ServiceStatus is a snapshot of the service's status. type ServiceStatus struct { // Status of the service. diff --git a/lib/tbot/readyz/readyz_test.go b/lib/tbot/readyz/readyz_test.go index 40641ab9d4a01..c37e73c0bc610 100644 --- a/lib/tbot/readyz/readyz_test.go +++ b/lib/tbot/readyz/readyz_test.go @@ -146,3 +146,32 @@ func TestReadyz(t *testing.T) { require.Equal(t, http.StatusNotFound, rsp.StatusCode) }) } + +func TestAllServicesReported(t *testing.T) { + reg := readyz.NewRegistry() + + a := reg.AddService("a") + b := reg.AddService("b") + + select { + case <-reg.AllServicesReported(): + t.Fatal("AllServicesReported should be blocked") + default: + } + + a.Report(readyz.Healthy) + + select { + case <-reg.AllServicesReported(): + t.Fatal("AllServicesReported should be blocked") + default: + } + + b.Report(readyz.Unhealthy) + + select { + case <-reg.AllServicesReported(): + default: + t.Fatal("AllServicesReported should not be blocked") + } +} diff --git a/lib/tbot/readyz/reporter.go b/lib/tbot/readyz/reporter.go index 3620bb96ef3fb..876f29186e020 100644 --- a/lib/tbot/readyz/reporter.go +++ b/lib/tbot/readyz/reporter.go @@ -32,6 +32,7 @@ type Reporter interface { type reporter struct { mu *sync.Mutex status *ServiceStatus + notify func() } func (r *reporter) Report(status Status) { @@ -44,6 +45,7 @@ func (r *reporter) ReportReason(status Status, reason string) { r.status.Status = status r.status.Reason = reason + r.notify() } // NoopReporter returns a no-op Reporter that can be used when no real reporter