Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go/vt/vttablet/tabletserver/repltracker/repltracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func (rt *ReplTracker) MakePrimary() {
rt.hw.Open()
}
rt.hw.Open()
replicationLagSeconds.Reset() // we are the primary, we have no lag
}

// MakeNonPrimary must be called if the tablet type becomes non-PRIMARY.
Expand Down Expand Up @@ -130,6 +131,7 @@ func (rt *ReplTracker) Status() (time.Duration, error) {

switch {
case rt.isPrimary || rt.mode == tabletenv.Disable:
replicationLagSeconds.Reset() // we are the primary, we have no lag
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could go either way about this being necessary compared to the first call. I don't think it's incorrect (both clauses imply we don't want these metrics), but I could still go either way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd remove this one, but don't feel strongly about it.

return 0, nil
case rt.mode == tabletenv.Heartbeat:
return rt.hr.Status()
Expand Down
49 changes: 49 additions & 0 deletions go/vt/vttablet/tabletserver/repltracker/repltracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,53 @@ func TestReplTracker(t *testing.T) {
assert.False(t, rt.hw.isOpen)
assert.False(t, rt.hr.isOpen)
})
t.Run("metric reset on promotion", func(t *testing.T) {
Copy link
Member

@mattlord mattlord Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confirmed that they indeed do fail as expected on main:

Running tool: /usr/local/go/bin/go test -timeout 30s -run ^TestReplTracker$ vitess.io/vitess/go/vt/vttablet/tabletserver/repltracker

--- FAIL: TestReplTracker (0.00s)
    --- FAIL: TestReplTracker/metric_reset_on_promotion (0.00s)
        /Users/matt/git/vitess/go/vt/vttablet/tabletserver/repltracker/repltracker_test.go:159: 
            	Error Trace:	/Users/matt/git/vitess/go/vt/vttablet/tabletserver/repltracker/repltracker_test.go:159
            	Error:      	Not equal: 
            	            	expected: 0
            	            	actual  : 42
            	Test:       	TestReplTracker/metric_reset_on_promotion
    --- FAIL: TestReplTracker/metric_reset_on_status_when_primary (0.00s)
        /Users/matt/git/vitess/go/vt/vttablet/tabletserver/repltracker/repltracker_test.go:184: 
            	Error Trace:	/Users/matt/git/vitess/go/vt/vttablet/tabletserver/repltracker/repltracker_test.go:184
            	Error:      	Not equal: 
            	            	expected: 0
            	            	actual  : 99
            	Test:       	TestReplTracker/metric_reset_on_status_when_primary
FAIL
FAIL	vitess.io/vitess/go/vt/vttablet/tabletserver/repltracker	0.819s
FAIL

// Clean up the global metric after test
defer replicationLagSeconds.Reset()

rt := NewReplTracker(env, alias)
rt.InitDBConfig(target, mysqld)

// Start as replica
rt.MakeNonPrimary()
assert.False(t, rt.isPrimary)

// Simulate having lag (would normally be set by poller)
replicationLagSeconds.Set(42)
assert.Equal(t, int64(42), replicationLagSeconds.Get())

// Promote to primary
rt.MakePrimary()
assert.True(t, rt.isPrimary)

// Verify metric is reset
assert.Equal(t, int64(0), replicationLagSeconds.Get())

rt.Close()
})
t.Run("metric reset on status when primary", func(t *testing.T) {
// Clean up the global metric after test
defer replicationLagSeconds.Reset()

rt := NewReplTracker(env, alias)
rt.InitDBConfig(target, mysqld)

// Set as primary
rt.MakePrimary()
assert.True(t, rt.isPrimary)

// Simulate metric having a stale value (shouldn't happen, but be defensive)
replicationLagSeconds.Set(99)
assert.Equal(t, int64(99), replicationLagSeconds.Get())

// Call Status() which should reset the metric
lag, err := rt.Status()
assert.NoError(t, err)
assert.Equal(t, time.Duration(0), lag)

// Verify metric is reset
assert.Equal(t, int64(0), replicationLagSeconds.Get())

rt.Close()
})
}
Loading