Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions go/vt/vttablet/tabletmanager/semisyncmonitor/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ type Monitor struct {
// the semisync_heartbeat table.
clearTicks *timer.Timer

// timerMu protects operations on the timers to prevent deadlocks
// This must be acquired before mu if both are needed.
timerMu sync.Mutex

// mu protects the fields below.
mu sync.Mutex
appPool *dbconnpool.ConnectionPool
Expand Down Expand Up @@ -123,8 +127,13 @@ func CreateTestSemiSyncMonitor(db *fakesqldb.DB, exporter *servenv.Exporter) *Mo

// Open starts the monitor.
func (m *Monitor) Open() {
// First acquire the timer mutex to prevent deadlock - we acquire in a consistent order
m.timerMu.Lock()
defer m.timerMu.Unlock()

m.mu.Lock()
defer m.mu.Unlock()

// The check for config being nil is only requried for tests.
if m.isOpen || m.config == nil || m.config.DB == nil {
// If we are already open, then there is nothing to do
Expand All @@ -145,16 +154,19 @@ func (m *Monitor) Open() {

// Close stops the monitor.
func (m *Monitor) Close() {
m.mu.Lock()
defer m.mu.Unlock()
if !m.isOpen {
// If we are already closed, then there is nothing to do
return
}
m.isOpen = false
// First acquire the timer mutex to prevent deadlock - we acquire in a consistent order
m.timerMu.Lock()
defer m.timerMu.Unlock()
log.Info("SemiSync Monitor: closing")
// We close the ticks before we acquire the mu mutex to prevent deadlock.
// The timer Close should not be called while holding a mutex that the function
// the timer runs also acquires.
m.clearTicks.Stop()
m.ticks.Stop()
// Acquire the mu mutex to update the isOpen field.
m.mu.Lock()
defer m.mu.Unlock()
m.isOpen = false
m.appPool.Close()
}

Expand Down
47 changes: 47 additions & 0 deletions go/vt/vttablet/tabletmanager/semisyncmonitor/monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,53 @@ func TestWaitUntilSemiSyncUnblocked(t *testing.T) {
require.True(t, m.isClosed())
}

// TestDeadlockOnClose tests the deadlock that can occur when calling Close().
// Look at https://github.com/vitessio/vitess/issues/18275 for more details.
func TestDeadlockOnClose(t *testing.T) {
db := fakesqldb.New(t)
defer db.Close()
params := db.ConnParams()
cp := *params
dbc := dbconfigs.NewTestDBConfigs(cp, cp, "")
config := &tabletenv.TabletConfig{
DB: dbc,
SemiSyncMonitor: tabletenv.SemiSyncMonitorConfig{
// Extremely low interval to trigger the deadlock quickly.
// This makes the monitor try and write that it is still blocked quite aggressively.
Interval: 10 * time.Millisecond,
},
}
m := NewMonitor(config, exporter)

// Set up for semisync to be blocked
db.SetNeverFail(true)
db.AddQuery(semiSyncWaitSessionsRead, sqltypes.MakeTestResult(sqltypes.MakeTestFields("variable_value", "varchar"), "1"))

// Open the monitor
m.Open()
defer m.Close()

// We will now try to close and open the monitor multiple times to see if we can trigger a deadlock.
finishCh := make(chan int)
go func() {
count := 100
for i := 0; i < count; i++ {
m.Close()
m.Open()
time.Sleep(20 * time.Millisecond)
}
close(finishCh)
}()

select {
case <-finishCh:
// The test finished without deadlocking.
case <-time.After(5 * time.Second):
// The test timed out, which means we deadlocked.
t.Fatalf("Deadlock occurred while closing the monitor")
}
}

// TestSemiSyncMonitor tests the semi-sync monitor as a black box.
// It only calls the exported methods to see they work as intended.
func TestSemiSyncMonitor(t *testing.T) {
Expand Down
Loading