From dab6fc30422a29c8f4f4e8c1c901c5e4a806f19c Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 18:12:40 +0200 Subject: [PATCH] Fix `vttablet` not being marked as not serving when MySQL stalls (#17883) Signed-off-by: Arthur Schreiber --- go/vt/vttablet/tabletserver/repltracker/reader.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/go/vt/vttablet/tabletserver/repltracker/reader.go b/go/vt/vttablet/tabletserver/repltracker/reader.go index b50e5e4b2c7..e1cf6324a25 100644 --- a/go/vt/vttablet/tabletserver/repltracker/reader.go +++ b/go/vt/vttablet/tabletserver/repltracker/reader.go @@ -60,6 +60,7 @@ type heartbeatReader struct { lagMu sync.Mutex lastKnownLag time.Duration + lastKnownTime time.Time lastKnownError error } @@ -103,6 +104,7 @@ func (r *heartbeatReader) Open() { log.Info("Heartbeat Reader: opening") r.pool.Open(r.env.Config().DB.AppWithDB(), r.env.Config().DB.DbaWithDB(), r.env.Config().DB.AppDebugWithDB()) + r.lastKnownTime = r.now() r.ticks.Start(func() { r.readHeartbeat() }) r.isOpen = true } @@ -130,9 +132,16 @@ func (r *heartbeatReader) Close() { func (r *heartbeatReader) Status() (time.Duration, error) { r.lagMu.Lock() defer r.lagMu.Unlock() + if r.lastKnownError != nil { return 0, r.lastKnownError } + + // Return an error if we didn't receive a heartbeat for more than two seconds + if !r.lastKnownTime.IsZero() && r.now().Sub(r.lastKnownTime) > 2*r.interval { + return 0, fmt.Errorf("no heartbeat received in over 2x the heartbeat interval") + } + return r.lastKnownLag, nil } @@ -162,6 +171,7 @@ func (r *heartbeatReader) readHeartbeat() { reads.Add(1) r.lagMu.Lock() + r.lastKnownTime = r.now() r.lastKnownLag = lag r.lastKnownError = nil r.lagMu.Unlock()