diff --git a/go/vt/vttablet/tabletserver/state_manager.go b/go/vt/vttablet/tabletserver/state_manager.go index 341bc12d1fd..36fb4aea22c 100644 --- a/go/vt/vttablet/tabletserver/state_manager.go +++ b/go/vt/vttablet/tabletserver/state_manager.go @@ -38,10 +38,14 @@ type servingState int64 const ( // StateNotConnected is the state where tabletserver is not - // connected to an underlying mysql instance. + // connected to an underlying mysql instance. In this state we close + // query engine since MySQL is probably unavailable StateNotConnected = servingState(iota) // StateNotServing is the state where tabletserver is connected // to an underlying mysql instance, but is not serving queries. + // We do not close the query engine to not close the pool. We keep + // the query engine open but prevent queries from running by blocking them + // in StartRequest. StateNotServing // StateServing is where queries are allowed. StateServing @@ -325,11 +329,25 @@ func (sm *stateManager) CheckMySQL() { } defer sm.transitioning.Release() + // This is required to prevent new queries from running in StartRequest + // unless they are part of a running transaction. + sm.setWantState(StateNotConnected) sm.closeAll() + + // Now that we reached the NotConnected state, we want to go back to the + // Serving state. The retry will only succeed once MySQL is reachable again + // Until then EnsureConnectionAndDB will error out. + sm.setWantState(StateServing) sm.retryTransition(fmt.Sprintf("Cannot connect to MySQL, shutting down query service: %v", err)) }() } +func (sm *stateManager) setWantState(stateWanted servingState) { + sm.mu.Lock() + defer sm.mu.Unlock() + sm.wantState = stateWanted +} + // StopService shuts down sm. If the shutdown doesn't complete // within timeBombDuration, it crashes the process. func (sm *stateManager) StopService() { diff --git a/go/vt/vttablet/tabletserver/state_manager_test.go b/go/vt/vttablet/tabletserver/state_manager_test.go index 4953a3affe4..ebece7f00c6 100644 --- a/go/vt/vttablet/tabletserver/state_manager_test.go +++ b/go/vt/vttablet/tabletserver/state_manager_test.go @@ -457,9 +457,16 @@ func TestStateManagerCheckMySQL(t *testing.T) { err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") require.NoError(t, err) + sm.te = &delayedTxEngine{} sm.qe.(*testQueryEngine).failMySQL = true order.Set(0) sm.CheckMySQL() + // We know checkMySQL will take atleast 50 milliseconds since txEngine.Close has a sleep in the test code + time.Sleep(10 * time.Millisecond) + // this asserts that checkMySQL is running + assert.EqualValues(t, 0, sm.checkMySQLThrottler.Size()) + // When we are in CheckMySQL state, we should not be accepting any new requests which aren't transactional + assert.False(t, sm.IsServing()) // Rechecking immediately should be a no-op: sm.CheckMySQL() @@ -491,6 +498,7 @@ func TestStateManagerCheckMySQL(t *testing.T) { time.Sleep(10 * time.Millisecond) } + assert.True(t, sm.IsServing()) assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.Target().TabletType) assert.Equal(t, StateServing, sm.State()) }