diff --git a/go/mysql/conn.go b/go/mysql/conn.go index 9f0c93a151a..9ee244b6dd4 100644 --- a/go/mysql/conn.go +++ b/go/mysql/conn.go @@ -761,13 +761,7 @@ func (c *Conn) handleNextCommand(handler Handler) error { data, err := c.readEphemeralPacket() if err != nil { // Don't log EOF errors. They cause too much spam. - // Note the EOF detection is not 100% - // guaranteed, in the case where the client - // connection is already closed before we call - // 'readEphemeralPacket'. This is a corner - // case though, and very unlikely to happen, - // and the only downside is we log a bit more then. - if err != io.EOF { + if err != io.EOF && !strings.Contains(err.Error(), "use of closed network connection") { log.Errorf("Error reading packet from %s: %v", c, err) } return err diff --git a/go/vt/dbconfigs/dbconfigs.go b/go/vt/dbconfigs/dbconfigs.go index fe13a8ee782..98dba288d78 100644 --- a/go/vt/dbconfigs/dbconfigs.go +++ b/go/vt/dbconfigs/dbconfigs.go @@ -27,6 +27,8 @@ import ( "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/vt/log" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/yaml2" ) @@ -190,6 +192,10 @@ func (c Connector) Connect(ctx context.Context) (*mysql.Conn, error) { // MysqlParams returns the connections params func (c Connector) MysqlParams() (*mysql.ConnParams, error) { + if c.connParams == nil { + // This is only possible during tests. + return nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, "parameters are empty") + } params, err := withCredentials(c.connParams) if err != nil { return nil, err diff --git a/go/vt/vttablet/endtoend/vstreamer_test.go b/go/vt/vttablet/endtoend/vstreamer_test.go index 67e7aac05bf..76d98f5b398 100644 --- a/go/vt/vttablet/endtoend/vstreamer_test.go +++ b/go/vt/vttablet/endtoend/vstreamer_test.go @@ -409,6 +409,7 @@ func TestSchemaVersioningLongDDL(t *testing.T) { } func runCases(ctx context.Context, t *testing.T, tests []test, eventCh chan []*binlogdatapb.VEvent) { + t.Helper() client := framework.NewClient() for _, test := range tests { diff --git a/go/vt/vttablet/heartbeat/reader.go b/go/vt/vttablet/heartbeat/reader.go index e7a492a30a8..e6b4afcb096 100644 --- a/go/vt/vttablet/heartbeat/reader.go +++ b/go/vt/vttablet/heartbeat/reader.go @@ -89,11 +89,8 @@ func NewReader(env tabletenv.Env) *Reader { } } -// Init does last minute initialization of db settings, such as keyspaceShard. -func (r *Reader) Init(target querypb.Target) { - if !r.enabled { - return - } +// InitDBConfig initializes the target name for the Reader. +func (r *Reader) InitDBConfig(target querypb.Target) { r.keyspaceShard = fmt.Sprintf("%s:%s", target.Keyspace, target.Shard) } diff --git a/go/vt/vttablet/heartbeat/writer.go b/go/vt/vttablet/heartbeat/writer.go index 8363d099650..0c212133ff9 100644 --- a/go/vt/vttablet/heartbeat/writer.go +++ b/go/vt/vttablet/heartbeat/writer.go @@ -21,14 +21,12 @@ import ( "sync" "time" - "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/withddl" "golang.org/x/net/context" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/timer" - "vitess.io/vitess/go/vt/dbconfigs" - "vitess.io/vitess/go/vt/dbconnpool" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/sqlparser" @@ -46,10 +44,14 @@ const ( tabletUid INT UNSIGNED NOT NULL, ts BIGINT UNSIGNED NOT NULL ) engine=InnoDB` - sqlInsertInitialRow = "INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%a, %a, %a) ON DUPLICATE KEY UPDATE ts=VALUES(ts)" - sqlUpdateHeartbeat = "UPDATE %s.heartbeat SET ts=%a, tabletUid=%a WHERE keyspaceShard=%a" + sqlUpsertHeartbeat = "INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%a, %a, %a) ON DUPLICATE KEY UPDATE ts=VALUES(ts), tabletUid=VALUES(tabletUid)" ) +var withDDL = withddl.New([]string{ + fmt.Sprintf(sqlCreateSidecarDB, "_vt"), + fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), +}) + // Writer runs on master tablets and writes heartbeats to the _vt.heartbeat // table at a regular interval, defined by heartbeat_interval. type Writer struct { @@ -90,25 +92,9 @@ func NewWriter(env tabletenv.Env, alias topodatapb.TabletAlias) *Writer { } } -// Init runs at tablet startup and last minute initialization of db settings, and -// creates the necessary tables for heartbeat. -func (w *Writer) Init(target querypb.Target) error { - if !w.enabled { - return nil - } - w.mu.Lock() - defer w.mu.Unlock() - log.Info("Initializing heartbeat table.") +// InitDBConfig initializes the target name for the Writer. +func (w *Writer) InitDBConfig(target querypb.Target) { w.keyspaceShard = fmt.Sprintf("%s:%s", target.Keyspace, target.Shard) - - if target.TabletType == topodatapb.TabletType_MASTER { - err := w.initializeTables(w.env.Config().DB.AppWithDB()) - if err != nil { - w.recordError(err) - return err - } - } - return nil } // Open sets up the Writer's db connection and launches the ticker @@ -124,9 +110,10 @@ func (w *Writer) Open() { if w.isOpen { return } + log.Info("Beginning heartbeat writes") w.pool.Open(w.env.Config().DB.AppWithDB(), w.env.Config().DB.DbaWithDB(), w.env.Config().DB.AppDebugWithDB()) - w.ticks.Start(func() { w.writeHeartbeat() }) + w.ticks.Start(w.writeHeartbeat) w.isOpen = true } @@ -147,36 +134,6 @@ func (w *Writer) Close() { w.isOpen = false } -// initializeTables attempts to create the heartbeat tables and record an -// initial row. The row is created only on master and is replicated to all -// other servers. -func (w *Writer) initializeTables(cp dbconfigs.Connector) error { - conn, err := dbconnpool.NewDBConnection(context.TODO(), cp) - if err != nil { - return vterrors.Wrap(err, "Failed to create connection for heartbeat") - } - defer conn.Close() - statements := []string{ - fmt.Sprintf(sqlCreateSidecarDB, "_vt"), - fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), - } - for _, s := range statements { - if _, err := conn.ExecuteFetch(s, 0, false); err != nil { - return vterrors.Wrap(err, "Failed to execute heartbeat init query") - } - } - insert, err := w.bindHeartbeatVars(sqlInsertInitialRow) - if err != nil { - return vterrors.Wrap(err, "Failed to bindHeartbeatVars initial heartbeat insert") - } - _, err = conn.ExecuteFetch(insert, 0, false) - if err != nil { - return vterrors.Wrap(err, "Failed to execute initial heartbeat insert") - } - writes.Add(1) - return nil -} - // bindHeartbeatVars takes a heartbeat write (insert or update) and // adds the necessary fields to the query as bind vars. This is done // to protect ourselves against a badly formed keyspace or shard name. @@ -196,29 +153,27 @@ func (w *Writer) bindHeartbeatVars(query string) (string, error) { // writeHeartbeat updates the heartbeat row for this tablet with the current time in nanoseconds. func (w *Writer) writeHeartbeat() { - defer w.env.LogError() - ctx, cancel := context.WithDeadline(context.Background(), w.now().Add(w.interval)) - defer cancel() - update, err := w.bindHeartbeatVars(sqlUpdateHeartbeat) - if err != nil { - w.recordError(err) - return - } - err = w.exec(ctx, update) - if err != nil { + if err := w.write(); err != nil { w.recordError(err) return } writes.Add(1) } -func (w *Writer) exec(ctx context.Context, query string) error { +func (w *Writer) write() error { + defer w.env.LogError() + ctx, cancel := context.WithDeadline(context.Background(), w.now().Add(w.interval)) + defer cancel() + upsert, err := w.bindHeartbeatVars(sqlUpsertHeartbeat) + if err != nil { + return err + } conn, err := w.pool.Get(ctx) if err != nil { return err } defer conn.Recycle() - _, err = conn.Exec(ctx, query, 0, false) + _, err = withDDL.Exec(ctx, upsert, conn.Exec) if err != nil { return err } diff --git a/go/vt/vttablet/heartbeat/writer_test.go b/go/vt/vttablet/heartbeat/writer_test.go index d0d8303d467..0d80c676236 100644 --- a/go/vt/vttablet/heartbeat/writer_test.go +++ b/go/vt/vttablet/heartbeat/writer_test.go @@ -21,6 +21,9 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "gotest.tools/assert" + "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/dbconfigs" @@ -35,10 +38,6 @@ var ( } ) -// TestCreateSchema tests that our initial INSERT uses -// the proper arguments. It also sanity checks the other init -// queries for completeness, and verifies that we return any -// failure that is encountered. func TestCreateSchema(t *testing.T) { db := fakesqldb.New(t) defer db.Close() @@ -46,44 +45,39 @@ func TestCreateSchema(t *testing.T) { defer tw.Close() writes.Reset() - db.AddQuery(fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), &sqltypes.Result{}) - db.AddQuery(fmt.Sprintf("INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%d, %d, '%s') ON DUPLICATE KEY UPDATE ts=VALUES(ts)", "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard), &sqltypes.Result{}) - if err := tw.initializeTables(db.ConnParams()); err == nil { - t.Fatal("initializeTables() should not have succeeded") + db.OrderMatters() + upsert := fmt.Sprintf("INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%d, %d, '%s') ON DUPLICATE KEY UPDATE ts=VALUES(ts), tabletUid=VALUES(tabletUid)", + "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard) + failInsert := fakesqldb.ExpectedExecuteFetch{ + Query: upsert, + Error: mysql.NewSQLError(mysql.ERBadDb, "", "bad db error"), } + db.AddExpectedExecuteFetch(failInsert) + db.AddExpectedQuery(fmt.Sprintf(sqlCreateSidecarDB, "_vt"), nil) + db.AddExpectedQuery(fmt.Sprintf(sqlCreateHeartbeatTable, "_vt"), nil) + db.AddExpectedQuery(upsert, nil) - db.AddQuery(fmt.Sprintf(sqlCreateSidecarDB, "_vt"), &sqltypes.Result{}) - if err := tw.initializeTables(db.ConnParams()); err != nil { - t.Fatalf("Should not be in error: %v", err) - } - - if got, want := writes.Get(), int64(1); got != want { - t.Fatalf("wrong writes count: got = %v, want = %v", got, want) - } + err := tw.write() + require.NoError(t, err) } -// TestWriteHearbeat ensures the proper arguments for the UPDATE query -// and writes get recorded in counters. func TestWriteHeartbeat(t *testing.T) { db := fakesqldb.New(t) defer db.Close() tw := newTestWriter(db, mockNowFunc) - db.AddQuery(fmt.Sprintf("UPDATE %s.heartbeat SET ts=%d, tabletUid=%d WHERE keyspaceShard='%s'", "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard), &sqltypes.Result{}) + upsert := fmt.Sprintf("INSERT INTO %s.heartbeat (ts, tabletUid, keyspaceShard) VALUES (%d, %d, '%s') ON DUPLICATE KEY UPDATE ts=VALUES(ts), tabletUid=VALUES(tabletUid)", + "_vt", now.UnixNano(), tw.tabletAlias.Uid, tw.keyspaceShard) + db.AddQuery(upsert, &sqltypes.Result{}) writes.Reset() writeErrors.Reset() tw.writeHeartbeat() - if got, want := writes.Get(), int64(1); got != want { - t.Fatalf("wrong writes count: got = %v; want = %v", got, want) - } - if got, want := writeErrors.Get(), int64(0); got != want { - t.Fatalf("wrong write errors count: got = %v; want = %v", got, want) - } + assert.Equal(t, int64(1), writes.Get()) + assert.Equal(t, int64(0), writeErrors.Get()) } -// TestWriteHeartbeatError ensures that we properly account for write errors. func TestWriteHeartbeatError(t *testing.T) { db := fakesqldb.New(t) defer db.Close() @@ -94,12 +88,8 @@ func TestWriteHeartbeatError(t *testing.T) { writeErrors.Reset() tw.writeHeartbeat() - if got, want := writes.Get(), int64(0); got != want { - t.Fatalf("wrong writes count: got = %v; want = %v", got, want) - } - if got, want := writeErrors.Get(), int64(1); got != want { - t.Fatalf("wrong write errors count: got = %v; want = %v", got, want) - } + assert.Equal(t, int64(0), writes.Get()) + assert.Equal(t, int64(1), writeErrors.Get()) } func newTestWriter(db *fakesqldb.DB, nowFunc func() time.Time) *Writer { diff --git a/go/vt/vttablet/tabletmanager/vreplication/external_connector.go b/go/vt/vttablet/tabletmanager/vreplication/external_connector.go index 9f1b2121c6a..e5c49209418 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/external_connector.go +++ b/go/vt/vttablet/tabletmanager/vreplication/external_connector.go @@ -88,14 +88,15 @@ func (ec *externalConnector) Get(name string) (*mysqlConnector, error) { c := &mysqlConnector{} c.env = tabletenv.NewEnv(config, name) c.se = schema.NewEngine(c.env) - c.vstreamer = vstreamer.NewEngine(c.env, nil, c.se) + c.vstreamer = vstreamer.NewEngine(c.env, nil, c.se, "") + c.vstreamer.InitDBConfig("") c.se.InitDBConfig(c.env.Config().DB.DbaWithDB()) // Open if err := c.se.Open(); err != nil { return nil, vterrors.Wrapf(err, "external mysqlConnector: %v", name) } - c.vstreamer.Open("", "") + c.vstreamer.Open() // Register ec.connectors[name] = c diff --git a/go/vt/vttablet/tabletmanager/vreplication/framework_test.go b/go/vt/vttablet/tabletmanager/vreplication/framework_test.go index f30bac4223f..9f18e8ac656 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/framework_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/framework_test.go @@ -89,8 +89,9 @@ func TestMain(m *testing.M) { // engines cannot be initialized in testenv because it introduces // circular dependencies. - streamerEngine = vstreamer.NewEngine(env.TabletEnv, env.SrvTopo, env.SchemaEngine) - streamerEngine.Open(env.KeyspaceName, env.Cells[0]) + streamerEngine = vstreamer.NewEngine(env.TabletEnv, env.SrvTopo, env.SchemaEngine, env.Cells[0]) + streamerEngine.InitDBConfig(env.KeyspaceName) + streamerEngine.Open() defer streamerEngine.Close() if err := env.Mysqld.ExecuteSuperQuery(context.Background(), fmt.Sprintf("create database %s", vrepldb)); err != nil { diff --git a/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go b/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go index 0ccc4fbd011..f7f79f5c8d9 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go @@ -1901,9 +1901,7 @@ func TestRestartOnVStreamEnd(t *testing.T) { expectDBClientQueries(t, []string{ "/update _vt.vreplication set message='vstream ended'", }) - if err := streamerEngine.Open(env.KeyspaceName, env.ShardName); err != nil { - t.Fatal(err) - } + streamerEngine.Open() execStatements(t, []string{ "insert into t1 values(2, 'aaa')", diff --git a/go/vt/vttablet/tabletserver/bench_test.go b/go/vt/vttablet/tabletserver/bench_test.go index c757454db45..43d3faea4df 100644 --- a/go/vt/vttablet/tabletserver/bench_test.go +++ b/go/vt/vttablet/tabletserver/bench_test.go @@ -27,8 +27,6 @@ import ( querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" - "vitess.io/vitess/go/vt/topo/memorytopo" - "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" ) // Benchmark run on 6/27/17, with optimized byte-level operations @@ -57,8 +55,10 @@ func init() { } func BenchmarkExecuteVarBinary(b *testing.B) { - db := setUpTabletServerTest(nil) + db, tsv := setupTabletServerTest(nil) defer db.Close() + defer tsv.StopService() + // sql that will be executed in this test bv := map[string]*querypb.BindVariable{ "vtg1": sqltypes.Int64BindVariable(1), @@ -67,14 +67,7 @@ func BenchmarkExecuteVarBinary(b *testing.B) { bv[fmt.Sprintf("vtg%d", i)] = sqltypes.BytesBindVariable(benchVarValue) } - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, newDBConfigs(db)); err != nil { - panic(err) - } - defer tsv.StopService() - db.AllowAll = true for i := 0; i < b.N; i++ { if _, err := tsv.Execute(context.Background(), &target, benchQuery, bv, 0, 0, nil); err != nil { @@ -84,8 +77,10 @@ func BenchmarkExecuteVarBinary(b *testing.B) { } func BenchmarkExecuteExpression(b *testing.B) { - db := setUpTabletServerTest(nil) + db, tsv := setupTabletServerTest(nil) defer db.Close() + defer tsv.StopService() + // sql that will be executed in this test bv := map[string]*querypb.BindVariable{ "vtg1": sqltypes.Int64BindVariable(1), @@ -97,14 +92,7 @@ func BenchmarkExecuteExpression(b *testing.B) { } } - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, newDBConfigs(db)); err != nil { - panic(err) - } - defer tsv.StopService() - db.AllowAll = true for i := 0; i < b.N; i++ { if _, err := tsv.Execute(context.Background(), &target, benchQuery, bv, 0, 0, nil); err != nil { diff --git a/go/vt/vttablet/tabletserver/messager/engine.go b/go/vt/vttablet/tabletserver/messager/engine.go index 3a84acea713..470622ed07b 100644 --- a/go/vt/vttablet/tabletserver/messager/engine.go +++ b/go/vt/vttablet/tabletserver/messager/engine.go @@ -72,14 +72,17 @@ func NewEngine(tsv TabletService, se *schema.Engine, vs VStreamer) *Engine { } // Open starts the Engine service. -func (me *Engine) Open() error { +func (me *Engine) Open() { + me.mu.Lock() if me.isOpen { - return nil + me.mu.Unlock() + return } - + me.mu.Unlock() + // Unlock before invoking RegisterNotifier because it + // obtains the same lock. me.se.RegisterNotifier("messages", me.schemaChanged) me.isOpen = true - return nil } // Close closes the Engine service. diff --git a/go/vt/vttablet/tabletserver/query_engine.go b/go/vt/vttablet/tabletserver/query_engine.go index 0c1842961ea..1ce97b70b87 100644 --- a/go/vt/vttablet/tabletserver/query_engine.go +++ b/go/vt/vttablet/tabletserver/query_engine.go @@ -28,7 +28,6 @@ import ( "vitess.io/vitess/go/acl" "vitess.io/vitess/go/cache" - "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/stats" "vitess.io/vitess/go/streamlog" "vitess.io/vitess/go/sync2" @@ -113,8 +112,9 @@ func (ep *TabletPlan) buildAuthorized() { // Close: There should be no more pending queries when this // function is called. type QueryEngine struct { - env tabletenv.Env - se *schema.Engine + isOpen bool + env tabletenv.Env + se *schema.Engine // mu protects the following fields. mu sync.RWMutex @@ -236,6 +236,9 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine { // Open must be called before sending requests to QueryEngine. func (qe *QueryEngine) Open() error { + if qe.isOpen { + return nil + } qe.conns.Open(qe.env.Config().DB.AppWithDB(), qe.env.Config().DB.DbaWithDB(), qe.env.Config().DB.AppDebugWithDB()) conn, err := qe.conns.Get(tabletenv.LocalContext()) @@ -255,19 +258,30 @@ func (qe *QueryEngine) Open() error { qe.streamConns.Open(qe.env.Config().DB.AppWithDB(), qe.env.Config().DB.DbaWithDB(), qe.env.Config().DB.AppDebugWithDB()) qe.se.RegisterNotifier("qe", qe.schemaChanged) + qe.isOpen = true return nil } +// StopServing kills all streaming queries. +// Other queries are handled by the tsv.requests Waitgroup. +func (qe *QueryEngine) StopServing() { + qe.streamQList.TerminateAll() +} + // Close must be called to shut down QueryEngine. // You must ensure that no more queries will be sent // before calling Close. func (qe *QueryEngine) Close() { + if !qe.isOpen { + return + } // Close in reverse order of Open. qe.se.UnregisterNotifier("qe") qe.plans.Clear() qe.tables = make(map[string]*schema.Table) qe.streamConns.Close() qe.conns.Close() + qe.isOpen = false } // GetPlan returns the TabletPlan that for the query. Plans are cached in a cache.LRUCache. @@ -358,18 +372,15 @@ func (qe *QueryEngine) ClearQueryPlanCache() { qe.plans.Clear() } -// IsMySQLReachable returns true if we can connect to MySQL. -func (qe *QueryEngine) IsMySQLReachable() bool { - conn, err := dbconnpool.NewDBConnection(context.TODO(), qe.env.Config().DB.DbaWithDB()) +// IsMySQLReachable returns an error if it cannot connect to MySQL. +// This can be called before opening the QueryEngine. +func (qe *QueryEngine) IsMySQLReachable() error { + conn, err := dbconnpool.NewDBConnection(context.TODO(), qe.env.Config().DB.AppWithDB()) if err != nil { - if mysql.IsConnErr(err) { - return false - } - log.Warningf("checking MySQL, unexpected error: %v", err) - return true + return err } conn.Close() - return true + return nil } func (qe *QueryEngine) schemaChanged(tables map[string]*schema.Table, created, altered, dropped []string) { diff --git a/go/vt/vttablet/tabletserver/query_executor_test.go b/go/vt/vttablet/tabletserver/query_executor_test.go index 5068eddfea2..922cf7d2762 100644 --- a/go/vt/vttablet/tabletserver/query_executor_test.go +++ b/go/vt/vttablet/tabletserver/query_executor_test.go @@ -262,11 +262,12 @@ func TestQueryExecutorPlans(t *testing.T) { assert.Equal(t, tcase.logWant, qre.logStats.RewrittenSQL(), tcase.input) // Test inside a transaction. - txid, alias, err := tsv.Begin(ctx, &tsv.target, nil) + target := tsv.sm.Target() + txid, alias, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) require.NotNil(t, alias, "alias should not be nil") assert.Equal(t, tsv.alias, *alias, "Wrong alias returned by Begin") - defer tsv.Commit(ctx, &tsv.target, txid) + defer tsv.Commit(ctx, &target, txid) qre = newTestQueryExecutor(ctx, tsv, tcase.input, txid) got, err = qre.Execute() @@ -328,11 +329,12 @@ func TestQueryExecutorSelectImpossible(t *testing.T) { assert.Equal(t, tcase.resultWant, got, tcase.input) assert.Equal(t, tcase.planWant, qre.logStats.PlanType, tcase.input) assert.Equal(t, tcase.logWant, qre.logStats.RewrittenSQL(), tcase.input) - txid, alias, err := tsv.Begin(ctx, &tsv.target, nil) + target := tsv.sm.Target() + txid, alias, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) require.NotNil(t, alias, "alias should not be nil") assert.Equal(t, tsv.alias, *alias, "Wrong tablet alias from Begin") - defer tsv.Commit(ctx, &tsv.target, txid) + defer tsv.Commit(ctx, &target, txid) qre = newTestQueryExecutor(ctx, tsv, tcase.input, txid) got, err = qre.Execute() @@ -435,11 +437,12 @@ func TestQueryExecutorLimitFailure(t *testing.T) { assert.Equal(t, tcase.logWant, qre.logStats.RewrittenSQL(), tcase.input) // Test inside a transaction. - txid, alias, err := tsv.Begin(ctx, &tsv.target, nil) + target := tsv.sm.Target() + txid, alias, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) require.NotNil(t, alias, "alias should not be nil") assert.Equal(t, tsv.alias, *alias, "Wrong tablet alias from Begin") - defer tsv.Commit(ctx, &tsv.target, txid) + defer tsv.Commit(ctx, &target, txid) qre = newTestQueryExecutor(ctx, tsv, tcase.input, txid) _, err = qre.Execute() @@ -1106,12 +1109,16 @@ func newTestTabletServer(ctx context.Context, flags executorFlags, db *fakesqldb tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) dbconfigs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - tsv.StartService(target, dbconfigs) + err := tsv.StartService(target, dbconfigs) + if err != nil { + panic(err) + } return tsv } func newTransaction(tsv *TabletServer, options *querypb.ExecuteOptions) int64 { - transactionID, _, err := tsv.Begin(context.Background(), &tsv.target, options) + target := tsv.sm.Target() + transactionID, _, err := tsv.Begin(context.Background(), &target, options) if err != nil { panic(vterrors.Wrap(err, "failed to start a transaction")) } @@ -1158,7 +1165,6 @@ func getTestTableFields() []*querypb.Field { func getQueryExecutorSupportedQueries(testTableHasMultipleUniqueKeys bool) map[string]*sqltypes.Result { return map[string]*sqltypes.Result{ // queries for twopc - sqlTurnoffBinlog: {}, fmt.Sprintf(sqlCreateSidecarDB, "_vt"): {}, fmt.Sprintf(sqlDropLegacy1, "_vt"): {}, fmt.Sprintf(sqlDropLegacy2, "_vt"): {}, diff --git a/go/vt/vttablet/tabletserver/schema/engine.go b/go/vt/vttablet/tabletserver/schema/engine.go index 5a258dd3c63..d8de480677c 100644 --- a/go/vt/vttablet/tabletserver/schema/engine.go +++ b/go/vt/vttablet/tabletserver/schema/engine.go @@ -261,6 +261,8 @@ func (se *Engine) reload(ctx context.Context) error { tableName := row[0].ToString() curTables[tableName] = true createTime, _ := evalengine.ToInt64(row[2]) + // TODO(sougou); find a better way detect changed tables. This method + // seems unreliable. The endtoend test flags all tables as changed. if _, ok := se.tables[tableName]; ok && createTime < se.lastChange { continue } diff --git a/go/vt/vttablet/tabletserver/state_manager.go b/go/vt/vttablet/tabletserver/state_manager.go new file mode 100644 index 00000000000..832df33668d --- /dev/null +++ b/go/vt/vttablet/tabletserver/state_manager.go @@ -0,0 +1,544 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tabletserver + +import ( + "context" + "fmt" + "sync" + "time" + + "vitess.io/vitess/go/history" + "vitess.io/vitess/go/sync2" + "vitess.io/vitess/go/vt/log" + querypb "vitess.io/vitess/go/vt/proto/query" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" +) + +type servingState int64 + +const ( + // StateNotConnected is the state where tabletserver is not + // connected to an underlying mysql instance. + StateNotConnected = servingState(iota) + // StateNotServing is the state where tabletserver is connected + // to an underlying mysql instance, but is not serving queries. + StateNotServing + // StateServing is where queries are allowed. + StateServing +) + +// transitionRetryInterval is for tests. +var transitionRetryInterval = 1 * time.Second + +// stateName names every state. The number of elements must +// match the number of states. Names can overlap. +var stateName = []string{ + "NOT_SERVING", + "NOT_SERVING", + "SERVING", +} + +// stateDetail matches every state and optionally more information about the reason +// why the state is serving / not serving. +var stateDetail = []string{ + "Not Connected", + "Not Serving", + "", +} + +// stateManager manages state transition for all the TabletServer +// subcomponents. +type stateManager struct { + // transitioning is a semaphore that must to be obtained + // before attempting a state transition. To prevent deadlocks, + // this must be acquired before the mu lock. We use a semaphore + // because we need TryAcquire, which is not supported by sync.Mutex. + // If an acquire is successful, we must either Release explicitly + // or invoke execTransition, which will release once it's done. + transitioning *sync2.Semaphore + + // mu should be held to access the group of variables under it. + // It is required in spite of the transitioning semaphore. + // This is because other goroutines will still want + // read the values while a transition is in progress. + // + // If a transition fails, we set retrying to true and launch + // retryTransition which loops until the state converges. + mu sync.Mutex + wantState servingState + wantTabletType topodatapb.TabletType + state servingState + target querypb.Target + retrying bool + // TODO(sougou): deprecate alsoAllow + alsoAllow []topodatapb.TabletType + + requests sync.WaitGroup + lameduck sync2.AtomicInt32 + + // Open must be done in forward order. + // Close must be done in reverse order. + // All Close functions must be called before Open. + se schemaEngine + hw subComponent + hr subComponent + vstreamer subComponent + tracker subComponent + watcher subComponent + qe queryEngine + txThrottler txThrottler + te txEngine + messager subComponent + + // checkMySQLThrottler ensures that CheckMysql + // doesn't get spammed. + checkMySQLThrottler *sync2.Semaphore + history *history.History + timebombDuration time.Duration +} + +type schemaEngine interface { + Open() error + MakeNonMaster() + Close() +} + +type queryEngine interface { + Open() error + IsMySQLReachable() error + StopServing() + Close() +} + +type txEngine interface { + AcceptReadWrite() error + AcceptReadOnly() error + Close() +} + +type subComponent interface { + Open() + Close() +} + +type txThrottler interface { + Open() error + Close() +} + +// SetServingType changes the state to the specified settings. +// If a transition is in progress, it waits and then executes the +// new request. If the transition fails, it returns an error, and +// launches retryTransition to ensure that the request will eventually +// be honored. +// If sm is already in the requested state, it returns stateChanged as +// false. +func (sm *stateManager) SetServingType(tabletType topodatapb.TabletType, state servingState, alsoAllow []topodatapb.TabletType) (stateChanged bool, err error) { + defer sm.ExitLameduck() + + if tabletType == topodatapb.TabletType_RESTORE { + // TODO(sougou): remove this code once tm can give us more accurate state requests. + state = StateNotConnected + } + + log.Infof("Starting transition to %v %v", tabletType, stateName[state]) + if sm.mustTransition(tabletType, state, alsoAllow) { + return true, sm.execTransition(tabletType, state) + } + return false, nil +} + +// mustTransition returns true if the requested state does not match the current +// state. If so, it acquires the semaphore and returns true. If a transition is +// already in progress, it waits. If the desired state is already reached, it +// returns false without acquiring the semaphore. +func (sm *stateManager) mustTransition(tabletType topodatapb.TabletType, state servingState, alsoAllow []topodatapb.TabletType) bool { + sm.transitioning.Acquire() + sm.mu.Lock() + defer sm.mu.Unlock() + + sm.wantTabletType = tabletType + sm.wantState = state + sm.alsoAllow = alsoAllow + if sm.target.TabletType == tabletType && sm.state == state { + sm.transitioning.Release() + return false + } + return true +} + +func (sm *stateManager) execTransition(tabletType topodatapb.TabletType, state servingState) error { + defer sm.transitioning.Release() + + var err error + switch state { + case StateServing: + if tabletType == topodatapb.TabletType_MASTER { + err = sm.serveMaster() + } else { + err = sm.serveNonMaster(tabletType) + } + case StateNotServing: + if tabletType == topodatapb.TabletType_MASTER { + err = sm.unserveMaster() + } else { + err = sm.unserveNonMaster(tabletType) + } + case StateNotConnected: + sm.closeAll() + } + if err != nil { + sm.retryTransition(fmt.Sprintf("Error transitioning to the desired state: %v, %v, will keep retrying: %v", tabletType, stateName[state], err)) + } + return err +} + +func (sm *stateManager) retryTransition(message string) { + sm.mu.Lock() + defer sm.mu.Unlock() + if sm.retrying { + return + } + sm.retrying = true + + log.Error(message) + go func() { + for { + time.Sleep(transitionRetryInterval) + if sm.recheckState() { + return + } + } + }() +} + +func (sm *stateManager) recheckState() bool { + if !sm.transitioning.TryAcquire() { + return false + } + sm.mu.Lock() + defer sm.mu.Unlock() + + if sm.wantState == sm.state && sm.wantTabletType == sm.target.TabletType { + sm.retrying = false + sm.transitioning.Release() + return true + } + go sm.execTransition(sm.wantTabletType, sm.wantState) + return false +} + +// CheckMySQL verifies that we can connect to mysql. +// If it fails, then we shutdown the service and initiate +// the retry loop. +func (sm *stateManager) CheckMySQL() { + if !sm.checkMySQLThrottler.TryAcquire() { + return + } + go func() { + defer func() { + time.Sleep(1 * time.Second) + sm.checkMySQLThrottler.Release() + }() + + err := sm.qe.IsMySQLReachable() + if err == nil { + return + } + + if !sm.transitioning.TryAcquire() { + // If we're already transitioning, don't interfere. + return + } + defer sm.transitioning.Release() + + sm.closeAll() + sm.retryTransition(fmt.Sprintf("Cannot connect to MySQL, shutting down query service: %v", err)) + }() +} + +// StopService shuts down sm. If the shutdown doesn't complete +// within timeBombDuration, it crashes the process. +func (sm *stateManager) StopService() { + defer close(sm.setTimeBomb()) + sm.SetServingType(sm.Target().TabletType, StateNotConnected, nil) +} + +// StartRequest validates the current state and target and registers +// the request (a waitgroup) as started. Every StartRequest must be +// ended with an EndRequest. +func (sm *stateManager) StartRequest(ctx context.Context, target *querypb.Target, allowOnShutdown bool) (err error) { + sm.mu.Lock() + defer sm.mu.Unlock() + + if sm.state != StateServing { + return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "operation not allowed in state %s", stateName[sm.state]) + } + + shuttingDown := sm.wantState != StateServing + if shuttingDown && !allowOnShutdown { + // This specific error string needs to be returned for vtgate buffering to work. + return vterrors.New(vtrpcpb.Code_FAILED_PRECONDITION, "operation not allowed in state SHUTTING_DOWN") + } + + if target != nil { + switch { + case target.Keyspace != sm.target.Keyspace: + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid keyspace %v", target.Keyspace) + case target.Shard != sm.target.Shard: + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid shard %v", target.Shard) + case target.TabletType != sm.target.TabletType: + for _, otherType := range sm.alsoAllow { + if target.TabletType == otherType { + goto ok + } + } + return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "invalid tablet type: %v, want: %v or %v", target.TabletType, sm.target.TabletType, sm.alsoAllow) + } + } else { + if !tabletenv.IsLocalContext(ctx) { + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "No target") + } + } + +ok: + sm.requests.Add(1) + return nil +} + +// EndRequest unregisters the current request (a waitgroup) as done. +func (sm *stateManager) EndRequest() { + sm.requests.Done() +} + +// VerifyTarget allows requests to be executed even in non-serving state. +// Such requests will get terminated without wait on shutdown. +func (sm *stateManager) VerifyTarget(ctx context.Context, target *querypb.Target) error { + sm.mu.Lock() + defer sm.mu.Unlock() + if target != nil { + switch { + case target.Keyspace != sm.target.Keyspace: + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid keyspace %v", target.Keyspace) + case target.Shard != sm.target.Shard: + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid shard %v", target.Shard) + case target.TabletType != sm.target.TabletType: + for _, otherType := range sm.alsoAllow { + if target.TabletType == otherType { + return nil + } + } + return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "invalid tablet type: %v, want: %v or %v", target.TabletType, sm.target.TabletType, sm.alsoAllow) + } + } else { + if !tabletenv.IsLocalContext(ctx) { + return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "No target") + } + } + return nil +} + +func (sm *stateManager) serveMaster() error { + sm.watcher.Close() + sm.hr.Close() + + if err := sm.connect(); err != nil { + return err + } + + sm.hw.Open() + sm.tracker.Open() + if err := sm.te.AcceptReadWrite(); err != nil { + return err + } + sm.messager.Open() + sm.setState(topodatapb.TabletType_MASTER, StateServing) + return nil +} + +func (sm *stateManager) unserveMaster() error { + sm.unserveCommon() + + sm.watcher.Close() + sm.hr.Close() + + if err := sm.connect(); err != nil { + return err + } + + sm.hw.Open() + sm.tracker.Open() + sm.setState(topodatapb.TabletType_MASTER, StateNotServing) + return nil +} + +func (sm *stateManager) serveNonMaster(wantTabletType topodatapb.TabletType) error { + sm.messager.Close() + sm.tracker.Close() + sm.hw.Close() + sm.se.MakeNonMaster() + + if err := sm.connect(); err != nil { + return err + } + + if err := sm.te.AcceptReadOnly(); err != nil { + return err + } + sm.hr.Open() + sm.watcher.Open() + sm.setState(wantTabletType, StateServing) + return nil +} + +func (sm *stateManager) unserveNonMaster(wantTabletType topodatapb.TabletType) error { + sm.unserveCommon() + + sm.tracker.Close() + sm.hw.Close() + sm.se.MakeNonMaster() + + if err := sm.connect(); err != nil { + return err + } + + sm.hr.Open() + sm.watcher.Open() + sm.setState(wantTabletType, StateNotServing) + return nil +} + +func (sm *stateManager) connect() error { + if err := sm.qe.IsMySQLReachable(); err != nil { + return err + } + if err := sm.se.Open(); err != nil { + return err + } + sm.vstreamer.Open() + if err := sm.qe.Open(); err != nil { + return err + } + return sm.txThrottler.Open() +} + +func (sm *stateManager) unserveCommon() { + sm.messager.Close() + sm.te.Close() + sm.qe.StopServing() + sm.requests.Wait() +} + +func (sm *stateManager) closeAll() { + sm.unserveCommon() + sm.txThrottler.Close() + sm.qe.Close() + sm.watcher.Close() + sm.tracker.Close() + sm.vstreamer.Close() + sm.hr.Close() + sm.hw.Close() + sm.se.Close() + sm.setState(topodatapb.TabletType_UNKNOWN, StateNotConnected) +} + +func (sm *stateManager) setTimeBomb() chan struct{} { + done := make(chan struct{}) + go func() { + if sm.timebombDuration == 0 { + return + } + tmr := time.NewTimer(sm.timebombDuration) + defer tmr.Stop() + select { + case <-tmr.C: + log.Fatal("Shutdown took too long. Crashing") + case <-done: + } + }() + return done +} + +// setState changes the state and logs the event. +func (sm *stateManager) setState(tabletType topodatapb.TabletType, state servingState) { + sm.mu.Lock() + defer sm.mu.Unlock() + + if tabletType == topodatapb.TabletType_UNKNOWN { + tabletType = sm.wantTabletType + } + log.Infof("TabletServer transition: %v -> %v, %s -> %s", sm.target.TabletType, tabletType, stateInfo(sm.state), stateInfo(state)) + sm.target.TabletType = tabletType + sm.state = state + sm.history.Add(&historyRecord{ + Time: time.Now(), + ServingState: stateInfo(state), + TabletType: sm.target.TabletType.String(), + }) +} + +// EnterLameduck causes tabletserver to enter the lameduck state. This +// state causes health checks to fail, but the behavior of tabletserver +// otherwise remains the same. Any subsequent calls to SetServingType will +// cause the tabletserver to exit this mode. +func (sm *stateManager) EnterLameduck() { + sm.lameduck.Set(1) +} + +// ExitLameduck causes the tabletserver to exit the lameduck mode. +func (sm *stateManager) ExitLameduck() { + sm.lameduck.Set(0) +} + +// IsServing returns true if TabletServer is in SERVING state. +func (sm *stateManager) IsServing() bool { + return sm.StateByName() == "SERVING" +} + +func (sm *stateManager) State() servingState { + sm.mu.Lock() + defer sm.mu.Unlock() + return sm.state +} + +func (sm *stateManager) Target() querypb.Target { + sm.mu.Lock() + defer sm.mu.Unlock() + target := sm.target + return target +} + +// StateByName returns the name of the current TabletServer state. +func (sm *stateManager) StateByName() string { + if sm.lameduck.Get() != 0 { + return "NOT_SERVING" + } + return stateName[sm.State()] +} + +// stateInfo returns a string representation of the state and optional detail +// about the reason for the state transition +func stateInfo(state servingState) string { + if state == StateServing { + return "SERVING" + } + return fmt.Sprintf("%s (%s)", stateName[state], stateDetail[state]) +} diff --git a/go/vt/vttablet/tabletserver/state_manager_test.go b/go/vt/vttablet/tabletserver/state_manager_test.go new file mode 100644 index 00000000000..89ee61cbe71 --- /dev/null +++ b/go/vt/vttablet/tabletserver/state_manager_test.go @@ -0,0 +1,615 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package tabletserver + +import ( + "errors" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "vitess.io/vitess/go/history" + "vitess.io/vitess/go/sync2" + querypb "vitess.io/vitess/go/vt/proto/query" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" + "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" +) + +func TestStateManagerStateByName(t *testing.T) { + states := []servingState{ + StateNotConnected, + StateNotServing, + StateServing, + } + // Don't reuse stateName. + names := []string{ + "NOT_SERVING", + "NOT_SERVING", + "SERVING", + } + sm := &stateManager{} + for i, state := range states { + sm.state = state + require.Equal(t, names[i], sm.StateByName(), "StateByName") + } + sm.EnterLameduck() + require.Equal(t, "NOT_SERVING", sm.StateByName(), "StateByName") +} + +func TestStateManagerServeMaster(t *testing.T) { + sm := newTestStateManager(t) + sm.EnterLameduck() + stateChanged, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + assert.Equal(t, int32(0), sm.lameduck.Get()) + + verifySubcomponent(t, 1, sm.watcher, testStateClosed) + verifySubcomponent(t, 2, sm.hr, testStateClosed) + + verifySubcomponent(t, 3, sm.se, testStateOpen) + verifySubcomponent(t, 4, sm.vstreamer, testStateOpen) + verifySubcomponent(t, 5, sm.qe, testStateOpen) + verifySubcomponent(t, 6, sm.txThrottler, testStateOpen) + verifySubcomponent(t, 7, sm.hw, testStateOpen) + verifySubcomponent(t, 8, sm.tracker, testStateOpen) + verifySubcomponent(t, 9, sm.te, testStateAcceptReadWrite) + verifySubcomponent(t, 10, sm.messager, testStateOpen) + + assert.False(t, sm.se.(*testSchemaEngine).nonMaster) + assert.True(t, sm.qe.(*testQueryEngine).isReachable) + assert.False(t, sm.qe.(*testQueryEngine).stopServing) + + assert.Equal(t, topodatapb.TabletType_MASTER, sm.target.TabletType) + assert.Equal(t, StateServing, sm.state) +} + +func TestStateManagerServeNonMaster(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_REPLICA, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + verifySubcomponent(t, 1, sm.messager, testStateClosed) + verifySubcomponent(t, 2, sm.tracker, testStateClosed) + verifySubcomponent(t, 3, sm.hw, testStateClosed) + assert.True(t, sm.se.(*testSchemaEngine).nonMaster) + + verifySubcomponent(t, 4, sm.se, testStateOpen) + verifySubcomponent(t, 5, sm.vstreamer, testStateOpen) + verifySubcomponent(t, 6, sm.qe, testStateOpen) + verifySubcomponent(t, 7, sm.txThrottler, testStateOpen) + verifySubcomponent(t, 8, sm.te, testStateAcceptReadOnly) + verifySubcomponent(t, 9, sm.hr, testStateOpen) + verifySubcomponent(t, 10, sm.watcher, testStateOpen) + + assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) + assert.Equal(t, StateServing, sm.state) +} + +func TestStateManagerUnserveMaster(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateNotServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + verifySubcomponent(t, 1, sm.messager, testStateClosed) + verifySubcomponent(t, 2, sm.te, testStateClosed) + assert.True(t, sm.qe.(*testQueryEngine).stopServing) + + verifySubcomponent(t, 3, sm.watcher, testStateClosed) + verifySubcomponent(t, 4, sm.hr, testStateClosed) + + verifySubcomponent(t, 5, sm.se, testStateOpen) + verifySubcomponent(t, 6, sm.vstreamer, testStateOpen) + verifySubcomponent(t, 7, sm.qe, testStateOpen) + verifySubcomponent(t, 8, sm.txThrottler, testStateOpen) + + verifySubcomponent(t, 9, sm.hw, testStateOpen) + verifySubcomponent(t, 10, sm.tracker, testStateOpen) + + assert.Equal(t, topodatapb.TabletType_MASTER, sm.target.TabletType) + assert.Equal(t, StateNotServing, sm.state) +} + +func TestStateManagerUnserveNonmaster(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_RDONLY, StateNotServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + verifySubcomponent(t, 1, sm.messager, testStateClosed) + verifySubcomponent(t, 2, sm.te, testStateClosed) + assert.True(t, sm.qe.(*testQueryEngine).stopServing) + + verifySubcomponent(t, 3, sm.tracker, testStateClosed) + verifySubcomponent(t, 4, sm.hw, testStateClosed) + assert.True(t, sm.se.(*testSchemaEngine).nonMaster) + + verifySubcomponent(t, 5, sm.se, testStateOpen) + verifySubcomponent(t, 6, sm.vstreamer, testStateOpen) + verifySubcomponent(t, 7, sm.qe, testStateOpen) + verifySubcomponent(t, 8, sm.txThrottler, testStateOpen) + + verifySubcomponent(t, 9, sm.hr, testStateOpen) + verifySubcomponent(t, 10, sm.watcher, testStateOpen) + + assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) + assert.Equal(t, StateNotServing, sm.state) +} + +func TestStateManagerClose(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_RDONLY, StateNotConnected, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + verifySubcomponent(t, 1, sm.messager, testStateClosed) + verifySubcomponent(t, 2, sm.te, testStateClosed) + assert.True(t, sm.qe.(*testQueryEngine).stopServing) + + verifySubcomponent(t, 3, sm.txThrottler, testStateClosed) + verifySubcomponent(t, 4, sm.qe, testStateClosed) + verifySubcomponent(t, 5, sm.watcher, testStateClosed) + verifySubcomponent(t, 6, sm.tracker, testStateClosed) + verifySubcomponent(t, 7, sm.vstreamer, testStateClosed) + verifySubcomponent(t, 8, sm.hr, testStateClosed) + verifySubcomponent(t, 9, sm.hw, testStateClosed) + verifySubcomponent(t, 10, sm.se, testStateClosed) + + assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) + assert.Equal(t, StateNotConnected, sm.state) +} + +func TestStateManagerStopService(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_REPLICA, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) + assert.Equal(t, StateServing, sm.state) + + sm.StopService() + assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) + assert.Equal(t, StateNotConnected, sm.state) +} + +// testWatcher is used as a hook to invoke another transition +type testWatcher struct { + t *testing.T + sm *stateManager + wg sync.WaitGroup +} + +func (te *testWatcher) Open() { +} + +func (te *testWatcher) Close() { + te.wg.Add(1) + go func() { + defer te.wg.Done() + + stateChanged, err := te.sm.SetServingType(topodatapb.TabletType_RDONLY, StateNotServing, nil) + assert.NoError(te.t, err) + assert.True(te.t, stateChanged) + }() +} + +func TestStateManagerSetServingTypeRace(t *testing.T) { + sm := newTestStateManager(t) + te := &testWatcher{ + t: t, + sm: sm, + } + sm.watcher = te + stateChanged, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + // Ensure the next call waits and then succeeds. + te.wg.Wait() + + // End state should be the final desired state. + assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) + assert.Equal(t, StateNotServing, sm.state) +} + +func TestStateManagerSetServingTypeNoChange(t *testing.T) { + sm := newTestStateManager(t) + stateChanged, err := sm.SetServingType(topodatapb.TabletType_REPLICA, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + stateChanged, err = sm.SetServingType(topodatapb.TabletType_REPLICA, StateServing, nil) + require.NoError(t, err) + assert.False(t, stateChanged) + + verifySubcomponent(t, 1, sm.messager, testStateClosed) + verifySubcomponent(t, 2, sm.tracker, testStateClosed) + verifySubcomponent(t, 3, sm.hw, testStateClosed) + assert.True(t, sm.se.(*testSchemaEngine).nonMaster) + + verifySubcomponent(t, 4, sm.se, testStateOpen) + verifySubcomponent(t, 5, sm.vstreamer, testStateOpen) + verifySubcomponent(t, 6, sm.qe, testStateOpen) + verifySubcomponent(t, 7, sm.txThrottler, testStateOpen) + verifySubcomponent(t, 8, sm.te, testStateAcceptReadOnly) + verifySubcomponent(t, 9, sm.hr, testStateOpen) + verifySubcomponent(t, 10, sm.watcher, testStateOpen) + + assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) + assert.Equal(t, StateServing, sm.state) +} + +func TestStateManagerTransitionFailRetry(t *testing.T) { + defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval) + transitionRetryInterval = 10 * time.Millisecond + + sm := newTestStateManager(t) + sm.qe.(*testQueryEngine).failMySQL = true + + stateChanged, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateServing, nil) + require.Error(t, err) + assert.True(t, stateChanged) + + // Calling retryTransition while retrying should be a no-op. + sm.retryTransition("") + + // Steal the lock and wait long enough for the retry + // to fail, and then release it. The retry will have + // to keep retrying. + sm.transitioning.Acquire() + time.Sleep(30 * time.Millisecond) + sm.transitioning.Release() + + for { + sm.mu.Lock() + retrying := sm.retrying + sm.mu.Unlock() + if !retrying { + break + } + time.Sleep(10 * time.Millisecond) + } + + assert.Equal(t, topodatapb.TabletType_MASTER, sm.Target().TabletType) + assert.Equal(t, StateServing, sm.State()) +} + +func TestStateManagerRestoreType(t *testing.T) { + sm := newTestStateManager(t) + sm.EnterLameduck() + stateChanged, err := sm.SetServingType(topodatapb.TabletType_RESTORE, StateNotServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + assert.Equal(t, topodatapb.TabletType_RESTORE, sm.target.TabletType) + // RESTORE can only be in StateNotConnected. + assert.Equal(t, StateNotConnected, sm.state) +} + +func TestStateManagerCheckMySQL(t *testing.T) { + defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval) + transitionRetryInterval = 10 * time.Millisecond + + sm := newTestStateManager(t) + + stateChanged, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateServing, nil) + require.NoError(t, err) + assert.True(t, stateChanged) + + sm.qe.(*testQueryEngine).failMySQL = true + order.Set(0) + sm.CheckMySQL() + + // Rechecking immediately should be a no-op: + sm.CheckMySQL() + + // Wait for closeAll to get under way. + for { + if order.Get() >= 1 { + break + } + time.Sleep(10 * time.Millisecond) + } + + // Wait to get out of transitioning state. + for { + if !sm.isTransitioning() { + break + } + time.Sleep(10 * time.Millisecond) + } + + // Wait for retry to finish. + for { + sm.mu.Lock() + retrying := sm.retrying + sm.mu.Unlock() + if !retrying { + break + } + time.Sleep(10 * time.Millisecond) + } + + assert.Equal(t, topodatapb.TabletType_MASTER, sm.Target().TabletType) + assert.Equal(t, StateServing, sm.State()) +} + +func TestStateManagerValidations(t *testing.T) { + sm := newTestStateManager(t) + target := &querypb.Target{TabletType: topodatapb.TabletType_MASTER} + sm.target = *target + + err := sm.StartRequest(ctx, target, false) + assert.Contains(t, err.Error(), "operation not allowed") + + sm.state = StateServing + sm.wantState = StateNotServing + err = sm.StartRequest(ctx, target, false) + assert.Contains(t, err.Error(), "operation not allowed") + + err = sm.StartRequest(ctx, target, true) + assert.NoError(t, err) + + sm.wantState = StateServing + target.Keyspace = "a" + err = sm.StartRequest(ctx, target, false) + assert.Contains(t, err.Error(), "invalid keyspace") + err = sm.VerifyTarget(ctx, target) + assert.Contains(t, err.Error(), "invalid keyspace") + + target.Keyspace = "" + target.Shard = "a" + err = sm.StartRequest(ctx, target, false) + assert.Contains(t, err.Error(), "invalid shard") + err = sm.VerifyTarget(ctx, target) + assert.Contains(t, err.Error(), "invalid shard") + + target.Shard = "" + target.TabletType = topodatapb.TabletType_REPLICA + err = sm.StartRequest(ctx, target, false) + assert.Contains(t, err.Error(), "invalid tablet type") + err = sm.VerifyTarget(ctx, target) + assert.Contains(t, err.Error(), "invalid tablet type") + + sm.alsoAllow = []topodatapb.TabletType{topodatapb.TabletType_REPLICA} + err = sm.StartRequest(ctx, target, false) + assert.NoError(t, err) + err = sm.VerifyTarget(ctx, target) + assert.NoError(t, err) + + err = sm.StartRequest(ctx, nil, false) + assert.Contains(t, err.Error(), "No target") + err = sm.VerifyTarget(ctx, nil) + assert.Contains(t, err.Error(), "No target") + + localctx := tabletenv.LocalContext() + err = sm.StartRequest(localctx, nil, false) + assert.NoError(t, err) + err = sm.VerifyTarget(localctx, nil) + assert.NoError(t, err) +} + +func TestStateManagerWaitForRequests(t *testing.T) { + sm := newTestStateManager(t) + target := &querypb.Target{TabletType: topodatapb.TabletType_MASTER} + sm.target = *target + sm.timebombDuration = 10 * time.Second + + _, err := sm.SetServingType(topodatapb.TabletType_MASTER, StateServing, nil) + require.NoError(t, err) + + err = sm.StartRequest(ctx, target, false) + require.NoError(t, err) + + // This will go into transition and wait. + // Wait for that state. + go sm.StopService() + for { + if !sm.isTransitioning() { + time.Sleep(10 * time.Millisecond) + continue + } + break + } + + // Verify that we're still transitioning. + assert.True(t, sm.isTransitioning()) + + sm.EndRequest() + + for { + if sm.isTransitioning() { + time.Sleep(10 * time.Millisecond) + continue + } + break + } + assert.Equal(t, StateNotConnected, sm.State()) +} + +func verifySubcomponent(t *testing.T, order int64, component interface{}, state testState) { + tos := component.(orderState) + assert.Equal(t, order, tos.Order()) + assert.Equal(t, state, tos.State()) +} + +func newTestStateManager(t *testing.T) *stateManager { + order.Set(0) + return &stateManager{ + se: &testSchemaEngine{}, + hw: &testSubcomponent{}, + hr: &testSubcomponent{}, + vstreamer: &testSubcomponent{}, + tracker: &testSubcomponent{}, + watcher: &testSubcomponent{}, + qe: &testQueryEngine{}, + txThrottler: &testTxThrottler{}, + te: &testTxEngine{}, + messager: &testSubcomponent{}, + + transitioning: sync2.NewSemaphore(1, 0), + checkMySQLThrottler: sync2.NewSemaphore(1, 0), + history: history.New(10), + timebombDuration: time.Duration(10 * time.Millisecond), + } +} + +func (sm *stateManager) isTransitioning() bool { + if sm.transitioning.TryAcquire() { + sm.transitioning.Release() + return false + } + return true +} + +var order sync2.AtomicInt64 + +type testState int + +const ( + _ = testState(iota) + testStateOpen + testStateClosed + testStateAcceptReadOnly + testStateAcceptReadWrite +) + +type orderState interface { + Order() int64 + State() testState +} + +type testOrderState struct { + order int64 + state testState +} + +func (tos testOrderState) Order() int64 { + return tos.order +} + +func (tos testOrderState) State() testState { + return tos.state +} + +type testSchemaEngine struct { + testOrderState + nonMaster bool +} + +func (te *testSchemaEngine) Open() error { + te.order = order.Add(1) + te.state = testStateOpen + return nil +} + +func (te *testSchemaEngine) MakeNonMaster() { + te.nonMaster = true +} + +func (te *testSchemaEngine) Close() { + te.order = order.Add(1) + te.state = testStateClosed +} + +type testQueryEngine struct { + testOrderState + isReachable bool + stopServing bool + + failMySQL bool +} + +func (te *testQueryEngine) Open() error { + te.order = order.Add(1) + te.state = testStateOpen + return nil +} + +func (te *testQueryEngine) IsMySQLReachable() error { + if te.failMySQL { + te.failMySQL = false + return errors.New("intentional error") + } + te.isReachable = true + return nil +} + +func (te *testQueryEngine) StopServing() { + te.stopServing = true +} + +func (te *testQueryEngine) Close() { + te.order = order.Add(1) + te.state = testStateClosed +} + +type testTxEngine struct { + testOrderState +} + +func (te *testTxEngine) AcceptReadWrite() error { + te.order = order.Add(1) + te.state = testStateAcceptReadWrite + return nil +} + +func (te *testTxEngine) AcceptReadOnly() error { + te.order = order.Add(1) + te.state = testStateAcceptReadOnly + return nil +} + +func (te *testTxEngine) Close() { + te.order = order.Add(1) + te.state = testStateClosed +} + +type testSubcomponent struct { + testOrderState +} + +func (te *testSubcomponent) Open() { + te.order = order.Add(1) + te.state = testStateOpen +} + +func (te *testSubcomponent) Close() { + te.order = order.Add(1) + te.state = testStateClosed +} + +type testTxThrottler struct { + testOrderState +} + +func (te *testTxThrottler) Open() error { + te.order = order.Add(1) + te.state = testStateOpen + return nil +} + +func (te *testTxThrottler) Close() { + te.order = order.Add(1) + te.state = testStateClosed +} diff --git a/go/vt/vttablet/tabletserver/status.go b/go/vt/vttablet/tabletserver/status.go index aed2c7de858..2e9cfec53e6 100644 --- a/go/vt/vttablet/tabletserver/status.go +++ b/go/vt/vttablet/tabletserver/status.go @@ -182,12 +182,10 @@ type queryserviceStatus struct { // AddStatusHeader registers a standlone header for the status page. func (tsv *TabletServer) AddStatusHeader() { tsv.exporter.AddStatusPart("Tablet Server", headerTemplate, func() interface{} { - tsv.mu.Lock() - defer tsv.mu.Unlock() return map[string]interface{}{ "Alias": tsv.exporter.Name(), "Prefix": tsv.exporter.URLPrefix(), - "Target": tsv.target, + "Target": tsv.sm.Target(), } }) } @@ -196,8 +194,8 @@ func (tsv *TabletServer) AddStatusHeader() { func (tsv *TabletServer) AddStatusPart() { tsv.exporter.AddStatusPart("Queryservice", queryserviceStatusTemplate, func() interface{} { status := queryserviceStatus{ - State: tsv.GetState(), - History: tsv.history.Records(), + State: tsv.sm.StateByName(), + History: tsv.sm.history.Records(), } rates := tsv.stats.QPSRates.Get() if qps, ok := rates["All"]; ok && len(qps) > 0 { diff --git a/go/vt/vttablet/tabletserver/tabletserver.go b/go/vt/vttablet/tabletserver/tabletserver.go index 41dcfd8b4e4..6c9a8865210 100644 --- a/go/vt/vttablet/tabletserver/tabletserver.go +++ b/go/vt/vttablet/tabletserver/tabletserver.go @@ -40,7 +40,6 @@ import ( "vitess.io/vitess/go/trace" "vitess.io/vitess/go/vt/callerid" "vitess.io/vitess/go/vt/dbconfigs" - "vitess.io/vitess/go/vt/dbconnpool" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" @@ -65,60 +64,11 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletserver/vstreamer" ) -const ( - // StateNotConnected is the state where tabletserver is not - // connected to an underlying mysql instance. - StateNotConnected = iota - // StateNotServing is the state where tabletserver is connected - // to an underlying mysql instance, but is not serving queries. - StateNotServing - // StateServing is where queries are allowed. - StateServing - // StateTransitioning is a transient state indicating that - // the tabletserver is tranisitioning to a new state. - // In order to achieve clean transitions, no requests are - // allowed during this state. - StateTransitioning - // StateShuttingDown indicates that the tabletserver - // is shutting down. In this state, we wait for outstanding - // requests and transactions to conclude. - StateShuttingDown -) - // logPoolFull is for throttling transaction / query pool full messages in the log. var logPoolFull = logutil.NewThrottledLogger("PoolFull", 1*time.Minute) var logComputeRowSerializerKey = logutil.NewThrottledLogger("ComputeRowSerializerKey", 1*time.Minute) -// stateName names every state. The number of elements must -// match the number of states. Names can overlap. -var stateName = []string{ - "NOT_SERVING", - "NOT_SERVING", - "SERVING", - "NOT_SERVING", - "SHUTTING_DOWN", -} - -// stateDetail matches every state and optionally more information about the reason -// why the state is serving / not serving. -var stateDetail = []string{ - "Not Connected", - "Not Serving", - "", - "Transitioning", - "Shutting Down", -} - -// stateInfo returns a string representation of the state and optional detail -// about the reason for the state transition -func stateInfo(state int64) string { - if state == StateServing { - return "SERVING" - } - return fmt.Sprintf("%s (%s)", stateName[state], stateDetail[state]) -} - // TabletServer implements the RPC interface for the query service. // TabletServer is initialized in the following sequence: // NewTabletServer->InitDBConfig->SetServingType. @@ -141,36 +91,22 @@ type TabletServer struct { TerseErrors bool enableHotRowProtection bool topoServer *topo.Server - checkMySQLThrottler *sync2.Semaphore - - // mu is used to access state. The lock should only be held - // for short periods. For longer periods, you have to transition - // the state to a transient value and release the lock. - // Once the operation is complete, you can then transition - // the state back to a stable value. - // The lameduck mode causes tablet server to respond as unhealthy - // for health checks. This does not affect how queries are served. - // target specifies the primary target type, and also allow specifies - // secondary types that should be additionally allowed. - mu sync.Mutex - state int64 - lameduck sync2.AtomicInt32 - target querypb.Target - alsoAllow []topodatapb.TabletType - requests sync.WaitGroup // These are sub-components of TabletServer. se *schema.Engine - qe *QueryEngine - te *TxEngine hw *heartbeat.Writer hr *heartbeat.Reader vstreamer *vstreamer.Engine tracker *schema.Tracker watcher *ReplicationWatcher + qe *QueryEngine txThrottler *txthrottler.TxThrottler + te *TxEngine messager *messager.Engine + // sm manages state transitions. + sm *stateManager + // streamHealthMutex protects all the following fields streamHealthMutex sync.Mutex streamHealthIndex int @@ -178,10 +114,6 @@ type TabletServer struct { lastStreamHealthResponse *querypb.StreamHealthResponse lastStreamHealthExpiration time.Time - // history records changes in state for display on the status page. - // It has its own internal mutex. - history *history.History - // alias is used for identifying this tabletserver in healthcheck responses. alias topodatapb.TabletAlias } @@ -215,40 +147,48 @@ func NewTabletServer(name string, config *tabletenv.TabletConfig, topoServer *to TerseErrors: config.TerseErrors, enableHotRowProtection: config.HotRowProtection.Mode != tabletenv.Disable, topoServer: topoServer, - checkMySQLThrottler: sync2.NewSemaphore(1, 0), streamHealthMap: make(map[int]chan<- *querypb.StreamHealthResponse), - history: history.New(10), alias: alias, } tsOnce.Do(func() { srvTopoServer = srvtopo.NewResilientServer(topoServer, "TabletSrvTopo") }) - // The following services should generally be opened in the order - // of initialization below, and closed in reverse order. - // However, gracefulStop is slightly different because only - // some services must be closed, while others should remain open. tsv.se = schema.NewEngine(tsv) - tsv.qe = NewQueryEngine(tsv, tsv.se) - tsv.te = NewTxEngine(tsv) tsv.hw = heartbeat.NewWriter(tsv, alias) tsv.hr = heartbeat.NewReader(tsv) - tsv.vstreamer = vstreamer.NewEngine(tsv, srvTopoServer, tsv.se) + tsv.vstreamer = vstreamer.NewEngine(tsv, srvTopoServer, tsv.se, alias.Cell) tsv.tracker = schema.NewTracker(tsv, tsv.vstreamer, tsv.se) tsv.watcher = NewReplicationWatcher(tsv, tsv.vstreamer, tsv.config) + tsv.qe = NewQueryEngine(tsv, tsv.se) tsv.txThrottler = txthrottler.NewTxThrottler(tsv.config, topoServer) + tsv.te = NewTxEngine(tsv) tsv.messager = messager.NewEngine(tsv, tsv.se, tsv.vstreamer) - tsv.exporter.NewGaugeFunc("TabletState", "Tablet server state", func() int64 { - tsv.mu.Lock() - defer tsv.mu.Unlock() - return tsv.state - }) - tsv.exporter.Publish("TabletStateName", stats.StringFunc(tsv.GetState)) + tsv.sm = &stateManager{ + se: tsv.se, + hw: tsv.hw, + hr: tsv.hr, + vstreamer: tsv.vstreamer, + tracker: tsv.tracker, + watcher: tsv.watcher, + qe: tsv.qe, + txThrottler: tsv.txThrottler, + te: tsv.te, + messager: tsv.messager, + + transitioning: sync2.NewSemaphore(1, 0), + checkMySQLThrottler: sync2.NewSemaphore(1, 0), + history: history.New(10), + timebombDuration: time.Duration(config.OltpReadPool.TimeoutSeconds * 10), + } + + tsv.exporter.NewGaugeFunc("TabletState", "Tablet server state", func() int64 { return int64(tsv.sm.State()) }) + tsv.exporter.Publish("TabletStateName", stats.StringFunc(tsv.sm.StateByName)) // TabletServerState exports the same information as the above two stats (TabletState / TabletStateName), // but exported with TabletStateName as a label for Prometheus, which doesn't support exporting strings as stat values. tsv.exporter.NewGaugesFuncWithMultiLabels("TabletServerState", "Tablet server state labeled by state name", []string{"name"}, func() map[string]int64 { - return map[string]int64{tsv.GetState(): 1} + return map[string]int64{tsv.sm.StateByName(): 1} }) tsv.exporter.NewGaugeDurationFunc("QueryTimeout", "Tablet server query timeout", tsv.QueryTimeout.Get) @@ -259,16 +199,21 @@ func NewTabletServer(name string, config *tabletenv.TabletConfig, topoServer *to return tsv } -// SetTracking forces tracking to be on or off. -// Only to be used for testing. -func (tsv *TabletServer) SetTracking(enabled bool) { - tsv.tracker.Enable(enabled) -} +// InitDBConfig initializes the db config variables for TabletServer. You must call this function +// to complete the creation of TabletServer. +func (tsv *TabletServer) InitDBConfig(target querypb.Target, dbcfgs *dbconfigs.DBConfigs) error { + if tsv.sm.State() != StateNotConnected { + return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "InitDBConfig failed, current state: %s", tsv.sm.StateByName()) + } + tsv.sm.target = target + tsv.config.DB = dbcfgs -// EnableHistorian forces historian to be on or off. -// Only to be used for testing. -func (tsv *TabletServer) EnableHistorian(enabled bool) { - _ = tsv.se.EnableHistorian(enabled) + tsv.se.InitDBConfig(tsv.config.DB.DbaWithDB()) + tsv.hw.InitDBConfig(target) + tsv.hr.InitDBConfig(target) + tsv.txThrottler.InitDBConfig(target) + tsv.vstreamer.InitDBConfig(target.Keyspace) + return nil } // Register prepares TabletServer for serving by calling @@ -322,56 +267,6 @@ func (tsv *TabletServer) SetQueryRules(ruleSource string, qrs *rules.Rules) erro return nil } -// GetState returns the name of the current TabletServer state. -func (tsv *TabletServer) GetState() string { - if tsv.lameduck.Get() != 0 { - return "NOT_SERVING" - } - tsv.mu.Lock() - name := stateName[tsv.state] - tsv.mu.Unlock() - return name -} - -// setState changes the state and logs the event. -// It requires the caller to hold a lock on mu. -func (tsv *TabletServer) setState(state int64) { - log.Infof("TabletServer state: %s -> %s", stateInfo(tsv.state), stateInfo(state)) - tsv.state = state - tsv.history.Add(&historyRecord{ - Time: time.Now(), - ServingState: stateInfo(state), - TabletType: tsv.target.TabletType.String(), - }) -} - -// transition obtains a lock and changes the state. -func (tsv *TabletServer) transition(newState int64) { - tsv.mu.Lock() - tsv.setState(newState) - tsv.mu.Unlock() -} - -// IsServing returns true if TabletServer is in SERVING state. -func (tsv *TabletServer) IsServing() bool { - return tsv.GetState() == "SERVING" -} - -// InitDBConfig initializes the db config variables for TabletServer. You must call this function before -// calling SetServingType. -func (tsv *TabletServer) InitDBConfig(target querypb.Target, dbcfgs *dbconfigs.DBConfigs) error { - tsv.mu.Lock() - defer tsv.mu.Unlock() - if tsv.state != StateNotConnected { - return vterrors.Errorf(vtrpcpb.Code_UNKNOWN, "InitDBConfig failed, current state: %s", stateName[tsv.state]) - } - tsv.target = target - tsv.config.DB = dbcfgs - - tsv.se.InitDBConfig(tsv.config.DB.DbaWithDB()) - return nil -} - func (tsv *TabletServer) initACL(tableACLConfigFile string, enforceTableACLConfig bool) { // tabletacl.Init loads ACL from file if *tableACLConfig is not empty err := tableacl.Init( @@ -410,171 +305,27 @@ func (tsv *TabletServer) InitACL(tableACLConfigFile string, enforceTableACLConfi } } -// StartService is a convenience function for InitDBConfig->SetServingType -// with serving=true. -func (tsv *TabletServer) StartService(target querypb.Target, dbcfgs *dbconfigs.DBConfigs) (err error) { - // Save tablet type away to prevent data races - tabletType := target.TabletType - err = tsv.InitDBConfig(target, dbcfgs) - if err != nil { - return err - } - _ /* state changed */, err = tsv.SetServingType(tabletType, true, nil) - return err -} - -// EnterLameduck causes tabletserver to enter the lameduck state. This -// state causes health checks to fail, but the behavior of tabletserver -// otherwise remains the same. Any subsequent calls to SetServingType will -// cause the tabletserver to exit this mode. -func (tsv *TabletServer) EnterLameduck() { - tsv.lameduck.Set(1) -} - -// ExitLameduck causes the tabletserver to exit the lameduck mode. -func (tsv *TabletServer) ExitLameduck() { - tsv.lameduck.Set(0) -} - -const ( - actionNone = iota - actionFullStart - actionServeNewType - actionGracefulStop -) - // SetServingType changes the serving type of the tabletserver. It starts or // stops internal services as deemed necessary. The tabletType determines the // primary serving type, while alsoAllow specifies other tablet types that // should also be honored for serving. // Returns true if the state of QueryService or the tablet type changed. func (tsv *TabletServer) SetServingType(tabletType topodatapb.TabletType, serving bool, alsoAllow []topodatapb.TabletType) (stateChanged bool, err error) { - defer tsv.ExitLameduck() - - action, err := tsv.decideAction(tabletType, serving, alsoAllow) - if err != nil { - return false, err - } - switch action { - case actionNone: - return false, nil - case actionFullStart: - if err := tsv.fullStart(); err != nil { - tsv.closeAll() - return true, err - } - return true, nil - case actionServeNewType: - if err := tsv.serveNewType(); err != nil { - tsv.closeAll() - return true, err - } - return true, nil - case actionGracefulStop: - tsv.gracefulStop() - return true, nil - } - panic("unreachable") -} - -func (tsv *TabletServer) decideAction(tabletType topodatapb.TabletType, serving bool, alsoAllow []topodatapb.TabletType) (action int, err error) { - tsv.mu.Lock() - defer tsv.mu.Unlock() - - tsv.alsoAllow = alsoAllow - - // Handle the case where the requested TabletType and serving state - // match our current state. This avoids an unnecessary transition. - // There's no similar shortcut if serving is false, because there - // are different 'not serving' states that require different actions. - if tsv.target.TabletType == tabletType { - if serving && tsv.state == StateServing { - // We're already in the desired state. - return actionNone, nil - } - } - tsv.target.TabletType = tabletType - switch tsv.state { - case StateNotConnected: - if serving { - tsv.setState(StateTransitioning) - return actionFullStart, nil - } - case StateNotServing: - if serving { - tsv.setState(StateTransitioning) - return actionServeNewType, nil - } - case StateServing: - if !serving { - tsv.setState(StateShuttingDown) - return actionGracefulStop, nil - } - tsv.setState(StateTransitioning) - return actionServeNewType, nil - case StateTransitioning, StateShuttingDown: - return actionNone, vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "cannot SetServingType, current state: %s", stateName[tsv.state]) - default: - panic("unreachable") + state := StateNotServing + if serving { + state = StateServing } - return actionNone, nil + return tsv.sm.SetServingType(tabletType, state, alsoAllow) } -func (tsv *TabletServer) fullStart() (err error) { - c, err := dbconnpool.NewDBConnection(context.TODO(), tsv.config.DB.AppWithDB()) - if err != nil { - log.Errorf("error creating db app connection: %v", err) - return err - } - c.Close() - - if err := tsv.se.Open(); err != nil { - log.Errorf("Could not load historian, but starting the query service anyways: %v", err) - } - if err := tsv.qe.Open(); err != nil { - return err - } - if err := tsv.te.Init(); err != nil { - return err - } - if err := tsv.hw.Init(tsv.target); err != nil { +// StartService is a convenience function for InitDBConfig->SetServingType +// with serving=true. +func (tsv *TabletServer) StartService(target querypb.Target, dbcfgs *dbconfigs.DBConfigs) error { + if err := tsv.InitDBConfig(target, dbcfgs); err != nil { return err } - tsv.hr.Init(tsv.target) - tsv.vstreamer.Open(tsv.target.Keyspace, tsv.alias.Cell) - return tsv.serveNewType() -} - -func (tsv *TabletServer) serveNewType() (err error) { - if tsv.target.TabletType == topodatapb.TabletType_MASTER { - tsv.watcher.Close() - tsv.hr.Close() - - tsv.te.AcceptReadWrite() - tsv.hw.Open() - tsv.tracker.Open() - if err := tsv.txThrottler.Open(tsv.target.Keyspace, tsv.target.Shard); err != nil { - return err - } - tsv.messager.Open() - } else { - tsv.messager.Close() - tsv.tracker.Close() - tsv.hw.Close() - tsv.se.MakeNonMaster() - - tsv.te.AcceptReadOnly() - tsv.hr.Open() - tsv.watcher.Open() - } - tsv.transition(StateServing) - return nil -} - -func (tsv *TabletServer) gracefulStop() { - defer close(tsv.setTimeBomb()) - tsv.waitForShutdown() - tsv.transition(StateNotServing) + _, err := tsv.sm.SetServingType(target.TabletType, StateServing, nil) + return err } // StopService shuts down the tabletserver to the uninitialized state. @@ -583,81 +334,14 @@ func (tsv *TabletServer) gracefulStop() { // should be called before process termination, or if MySQL is unreachable. // Under normal circumstances, SetServingType should be called. func (tsv *TabletServer) StopService() { - defer close(tsv.setTimeBomb()) - defer tsv.LogError() - - tsv.mu.Lock() - if tsv.state != StateServing && tsv.state != StateNotServing { - tsv.mu.Unlock() - return - } - tsv.setState(StateShuttingDown) - tsv.mu.Unlock() - - log.Info("Executing complete shutdown.") - tsv.waitForShutdown() - tsv.tracker.Close() - tsv.vstreamer.Close() - tsv.hr.Close() - tsv.hw.Close() - tsv.qe.Close() - tsv.se.Close() - log.Info("Shutdown complete.") - tsv.transition(StateNotConnected) -} - -func (tsv *TabletServer) waitForShutdown() { - tsv.messager.Close() - tsv.txThrottler.Close() - tsv.watcher.Close() - tsv.te.Close() - tsv.qe.streamQList.TerminateAll() - tsv.requests.Wait() -} - -// closeAll is called if TabletServer fails to start. -// It forcibly shuts down everything. -func (tsv *TabletServer) closeAll() { - tsv.messager.Close() - tsv.txThrottler.Close() - tsv.watcher.Close() - tsv.tracker.Close() - tsv.vstreamer.Close() - tsv.hr.Close() - tsv.hw.Close() - tsv.te.Close() - tsv.qe.streamQList.TerminateAll() - tsv.qe.Close() - tsv.se.Close() - tsv.transition(StateNotConnected) -} - -func (tsv *TabletServer) setTimeBomb() chan struct{} { - done := make(chan struct{}) - go func() { - qt := tsv.QueryTimeout.Get() - if qt == 0 { - return - } - tmr := time.NewTimer(10 * qt) - defer tmr.Stop() - select { - case <-tmr.C: - log.Fatal("Shutdown took too long. Crashing") - case <-done: - } - }() - return done + tsv.sm.StopService() } // IsHealthy returns nil for non-serving types or if the query service is healthy (able to // connect to the database and serving traffic), or an error explaining // the unhealthiness otherwise. func (tsv *TabletServer) IsHealthy() error { - tsv.mu.Lock() - tabletType := tsv.target.TabletType - tsv.mu.Unlock() - switch tabletType { + switch tsv.sm.Target().TabletType { case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA, topodatapb.TabletType_BATCH, topodatapb.TabletType_EXPERIMENTAL: _, err := tsv.Execute( tabletenv.LocalContext(), @@ -674,54 +358,6 @@ func (tsv *TabletServer) IsHealthy() error { } } -// CheckMySQL initiates a check to see if MySQL is reachable. -// If not, it shuts down the query service. The check is rate-limited -// to no more than once per second. -// The function satisfies tabletenv.Env. -func (tsv *TabletServer) CheckMySQL() { - if !tsv.checkMySQLThrottler.TryAcquire() { - return - } - go func() { - defer func() { - tsv.LogError() - time.Sleep(1 * time.Second) - tsv.checkMySQLThrottler.Release() - }() - if tsv.isMySQLReachable() { - return - } - log.Info("Check MySQL failed. Shutting down query service") - tsv.StopService() - }() -} - -// isMySQLReachable returns true if we can connect to MySQL. -// The function returns false only if the query service is -// in StateServing or StateNotServing. -func (tsv *TabletServer) isMySQLReachable() bool { - tsv.mu.Lock() - switch tsv.state { - case StateServing: - // Prevent transition out of this state by - // reserving a request. - tsv.requests.Add(1) - defer tsv.requests.Done() - case StateNotServing: - // Prevent transition out of this state by - // temporarily switching to StateTransitioning. - tsv.setState(StateTransitioning) - defer func() { - tsv.transition(StateNotServing) - }() - default: - tsv.mu.Unlock() - return true - } - tsv.mu.Unlock() - return tsv.qe.IsMySQLReachable() -} - // ReloadSchema reloads the schema. func (tsv *TabletServer) ReloadSchema(ctx context.Context) error { return tsv.se.Reload(ctx) @@ -1078,9 +714,9 @@ func (tsv *TabletServer) ExecuteBatch(ctx context.Context, target *querypb.Targe if tsv.enableHotRowProtection && asTransaction { // Serialize transactions which target the same hot row range. - // NOTE: We put this intentionally at this place *before* tsv.startRequest() - // gets called below. Otherwise, the startRequest()/endRequest() section from - // below would overlap with the startRequest()/endRequest() section executed + // NOTE: We put this intentionally at this place *before* StartRequest() + // gets called below. Otherwise, the StartRequest()/EndRequest() section from + // below would overlap with the StartRequest()/EndRequest() section executed // by tsv.beginWaitForSameRangeTransactions(). txDone, err := tsv.beginWaitForSameRangeTransactions(ctx, target, options, queries[0].Sql, queries[0].BindVariables) if err != nil { @@ -1092,16 +728,16 @@ func (tsv *TabletServer) ExecuteBatch(ctx context.Context, target *querypb.Targe } allowOnShutdown := transactionID != 0 - // TODO(sougou): Convert startRequest/endRequest pattern to use wrapper + // TODO(sougou): Convert StartRequest/EndRequest pattern to use wrapper // function tsv.execRequest() instead. // Note that below we always return "err" right away and do not call // tsv.convertAndLogError. That's because the methods which returned "err", // e.g. tsv.Execute(), already called that function and therefore already // converted and logged the error. - if err = tsv.startRequest(ctx, target, allowOnShutdown); err != nil { + if err = tsv.sm.StartRequest(ctx, target, allowOnShutdown); err != nil { return nil, err } - defer tsv.endRequest() + defer tsv.sm.EndRequest() defer tsv.handlePanicAndSendLogStats("batch", nil, nil) if options == nil { @@ -1322,10 +958,10 @@ func (tsv *TabletServer) PurgeMessages(ctx context.Context, target *querypb.Targ } func (tsv *TabletServer) execDML(ctx context.Context, target *querypb.Target, queryGenerator func() (string, map[string]*querypb.BindVariable, error)) (count int64, err error) { - if err = tsv.startRequest(ctx, target, false /* allowOnShutdown */); err != nil { + if err = tsv.sm.StartRequest(ctx, target, false /* allowOnShutdown */); err != nil { return 0, err } - defer tsv.endRequest() + defer tsv.sm.EndRequest() defer tsv.handlePanicAndSendLogStats("ack", nil, nil) query, bv, err := queryGenerator() @@ -1358,7 +994,7 @@ func (tsv *TabletServer) execDML(ctx context.Context, target *querypb.Target, qu // VStream streams VReplication events. func (tsv *TabletServer) VStream(ctx context.Context, target *querypb.Target, startPos string, tablePKs []*binlogdatapb.TableLastPK, filter *binlogdatapb.Filter, send func([]*binlogdatapb.VEvent) error) error { - if err := tsv.verifyTarget(ctx, target); err != nil { + if err := tsv.sm.VerifyTarget(ctx, target); err != nil { return err } return tsv.vstreamer.Stream(ctx, startPos, tablePKs, filter, send) @@ -1366,7 +1002,7 @@ func (tsv *TabletServer) VStream(ctx context.Context, target *querypb.Target, st // VStreamRows streams rows from the specified starting point. func (tsv *TabletServer) VStreamRows(ctx context.Context, target *querypb.Target, query string, lastpk *querypb.QueryResult, send func(*binlogdatapb.VStreamRowsResponse) error) error { - if err := tsv.verifyTarget(ctx, target); err != nil { + if err := tsv.sm.VerifyTarget(ctx, target); err != nil { return err } var row []sqltypes.Value @@ -1382,7 +1018,7 @@ func (tsv *TabletServer) VStreamRows(ctx context.Context, target *querypb.Target // VStreamResults streams rows from the specified starting point. func (tsv *TabletServer) VStreamResults(ctx context.Context, target *querypb.Target, query string, send func(*binlogdatapb.VStreamResultsResponse) error) error { - if err := tsv.verifyTarget(ctx, target); err != nil { + if err := tsv.sm.VerifyTarget(ctx, target); err != nil { return err } return tsv.vstreamer.StreamResults(ctx, query, send) @@ -1496,14 +1132,14 @@ func (tsv *TabletServer) execRequest( logStats.OriginalSQL = sql logStats.BindVariables = bindVariables defer tsv.handlePanicAndSendLogStats(sql, bindVariables, logStats) - if err = tsv.startRequest(ctx, target, allowOnShutdown); err != nil { + if err = tsv.sm.StartRequest(ctx, target, allowOnShutdown); err != nil { return err } ctx, cancel := withTimeout(ctx, timeout, options) defer func() { cancel() - tsv.endRequest() + tsv.sm.EndRequest() }() err = exec(ctx, logStats) @@ -1513,32 +1149,6 @@ func (tsv *TabletServer) execRequest( return nil } -// verifyTarget allows requests to be executed even in non-serving state. -func (tsv *TabletServer) verifyTarget(ctx context.Context, target *querypb.Target) error { - tsv.mu.Lock() - defer tsv.mu.Unlock() - - if target != nil { - // a valid target needs to be used - switch { - case target.Keyspace != tsv.target.Keyspace: - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid keyspace %v", target.Keyspace) - case target.Shard != tsv.target.Shard: - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid shard %v", target.Shard) - case target.TabletType != tsv.target.TabletType: - for _, otherType := range tsv.alsoAllow { - if target.TabletType == otherType { - return nil - } - } - return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "invalid tablet type: %v, want: %v or %v", target.TabletType, tsv.target.TabletType, tsv.alsoAllow) - } - } else if !tabletenv.IsLocalContext(ctx) { - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "No target") - } - return nil -} - func (tsv *TabletServer) handlePanicAndSendLogStats( sql string, bindVariables map[string]*querypb.BindVariable, @@ -1801,9 +1411,7 @@ func (tsv *TabletServer) streamHealthUnregister(id int) { // BroadcastHealth will broadcast the current health to all listeners func (tsv *TabletServer) BroadcastHealth(terTimestamp int64, stats *querypb.RealtimeStats, maxCache time.Duration) { - tsv.mu.Lock() - target := tsv.target - tsv.mu.Unlock() + target := tsv.sm.Target() shr := &querypb.StreamHealthResponse{ Target: &target, TabletAlias: &tsv.alias, @@ -1840,6 +1448,32 @@ func (tsv *TabletServer) HeartbeatLag() (time.Duration, error) { return tsv.hr.GetLatest() } +// EnterLameduck causes tabletserver to enter the lameduck state. This +// state causes health checks to fail, but the behavior of tabletserver +// otherwise remains the same. Any subsequent calls to SetServingType will +// cause the tabletserver to exit this mode. +func (tsv *TabletServer) EnterLameduck() { + tsv.sm.EnterLameduck() +} + +// ExitLameduck causes the tabletserver to exit the lameduck mode. +func (tsv *TabletServer) ExitLameduck() { + tsv.sm.ExitLameduck() +} + +// IsServing returns true if TabletServer is in SERVING state. +func (tsv *TabletServer) IsServing() bool { + return tsv.sm.IsServing() +} + +// CheckMySQL initiates a check to see if MySQL is reachable. +// If not, it shuts down the query service. The check is rate-limited +// to no more than once per second. +// The function satisfies tabletenv.Env. +func (tsv *TabletServer) CheckMySQL() { + tsv.sm.CheckMySQL() +} + // TopoServer returns the topo server. func (tsv *TabletServer) TopoServer() *topo.Server { return tsv.topoServer @@ -1857,52 +1491,6 @@ func (tsv *TabletServer) Close(ctx context.Context) error { return nil } -// startRequest validates the current state and target and registers -// the request (a waitgroup) as started. Every startRequest requires -// one and only one corresponding endRequest. When the service shuts -// down, StopService will wait on this waitgroup to ensure that there -// are no requests in flight. -func (tsv *TabletServer) startRequest(ctx context.Context, target *querypb.Target, allowOnShutdown bool) (err error) { - tsv.mu.Lock() - defer tsv.mu.Unlock() - if tsv.state == StateServing { - goto verifyTarget - } - if allowOnShutdown && tsv.state == StateShuttingDown { - goto verifyTarget - } - return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "operation not allowed in state %s", stateName[tsv.state]) - -verifyTarget: - if target != nil { - // a valid target needs to be used - switch { - case target.Keyspace != tsv.target.Keyspace: - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid keyspace %v", target.Keyspace) - case target.Shard != tsv.target.Shard: - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "invalid shard %v", target.Shard) - case target.TabletType != tsv.target.TabletType: - for _, otherType := range tsv.alsoAllow { - if target.TabletType == otherType { - goto ok - } - } - return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "invalid tablet type: %v, want: %v or %v", target.TabletType, tsv.target.TabletType, tsv.alsoAllow) - } - } else if !tabletenv.IsLocalContext(ctx) { - return vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "No target") - } - -ok: - tsv.requests.Add(1) - return nil -} - -// endRequest unregisters the current request (a waitgroup) as done. -func (tsv *TabletServer) endRequest() { - tsv.requests.Done() -} - func (tsv *TabletServer) registerDebugHealthHandler() { tsv.exporter.HandleFunc("/debug/health", func(w http.ResponseWriter, r *http.Request) { if err := acl.CheckAccessHTTP(r, acl.MONITORING); err != nil { @@ -1945,6 +1533,18 @@ func (tsv *TabletServer) registerTwopczHandler() { }) } +// SetTracking forces tracking to be on or off. +// Only to be used for testing. +func (tsv *TabletServer) SetTracking(enabled bool) { + tsv.tracker.Enable(enabled) +} + +// EnableHistorian forces historian to be on or off. +// Only to be used for testing. +func (tsv *TabletServer) EnableHistorian(enabled bool) { + _ = tsv.se.EnableHistorian(enabled) +} + // SetPoolSize changes the pool size to the specified value. // This function should only be used for testing. func (tsv *TabletServer) SetPoolSize(val int) { diff --git a/go/vt/vttablet/tabletserver/tabletserver_test.go b/go/vt/vttablet/tabletserver/tabletserver_test.go index 4ef64959352..718d08e9de9 100644 --- a/go/vt/vttablet/tabletserver/tabletserver_test.go +++ b/go/vt/vttablet/tabletserver/tabletserver_test.go @@ -32,16 +32,15 @@ import ( "vitess.io/vitess/go/vt/callerid" + "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/test/utils" "github.com/stretchr/testify/assert" - "github.com/golang/protobuf/proto" "github.com/stretchr/testify/require" "golang.org/x/net/context" "vitess.io/vitess/go/mysql" - "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/sqlparser" @@ -56,426 +55,37 @@ import ( vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" ) -func TestTabletServerGetState(t *testing.T) { - states := []int64{ - StateNotConnected, - StateNotServing, - StateServing, - StateTransitioning, - StateShuttingDown, - } - // Don't reuse stateName. - names := []string{ - "NOT_SERVING", - "NOT_SERVING", - "SERVING", - "NOT_SERVING", - "SHUTTING_DOWN", - } - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - for i, state := range states { - tsv.setState(state) - require.Equal(t, names[i], tsv.GetState(), "GetState") - } - tsv.EnterLameduck() - require.Equal(t, "NOT_SERVING", tsv.GetState(), "GetState") -} - -func TestTabletServerAllowQueriesFailBadConn(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - db.EnableConnFail() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - checkTabletServerState(t, tsv, StateNotConnected) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.Error(t, err, "TabletServer.StartService should fail") - checkTabletServerState(t, tsv, StateNotConnected) -} - -func TestTabletServerAllowQueries(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - checkTabletServerState(t, tsv, StateNotConnected) - dbcfgs := newDBConfigs(db) - tsv.setState(StateServing) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - tsv.StopService() - want := "InitDBConfig failed" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - tsv.setState(StateShuttingDown) - err = tsv.StartService(target, dbcfgs) - require.Error(t, err, "TabletServer.StartService should fail") - tsv.StopService() -} - -func TestTabletServerInitDBConfig(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - tsv.setState(StateServing) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - dbcfgs := newDBConfigs(db) - err := tsv.InitDBConfig(target, dbcfgs) - want := "InitDBConfig failed" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - tsv.setState(StateNotConnected) - err = tsv.InitDBConfig(target, dbcfgs) - require.NoError(t, err) -} - -func TestDecideAction(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - dbcfgs := newDBConfigs(db) - err := tsv.InitDBConfig(target, dbcfgs) - require.NoError(t, err) - - tsv.setState(StateNotConnected) - action, err := tsv.decideAction(topodatapb.TabletType_MASTER, false, nil) - require.NoError(t, err) - if action != actionNone { - t.Errorf("decideAction: %v, want %v", action, actionNone) - } - - tsv.setState(StateNotConnected) - action, err = tsv.decideAction(topodatapb.TabletType_MASTER, true, nil) - require.NoError(t, err) - if action != actionFullStart { - t.Errorf("decideAction: %v, want %v", action, actionFullStart) - } - if tsv.state != StateTransitioning { - t.Errorf("tsv.state: %v, want %v", tsv.state, StateTransitioning) - } - - tsv.setState(StateNotServing) - action, err = tsv.decideAction(topodatapb.TabletType_MASTER, false, nil) - require.NoError(t, err) - if action != actionNone { - t.Errorf("decideAction: %v, want %v", action, actionNone) - } - - tsv.setState(StateNotServing) - action, err = tsv.decideAction(topodatapb.TabletType_MASTER, true, nil) - require.NoError(t, err) - if action != actionServeNewType { - t.Errorf("decideAction: %v, want %v", action, actionServeNewType) - } - if tsv.state != StateTransitioning { - t.Errorf("tsv.state: %v, want %v", tsv.state, StateTransitioning) - } - - tsv.setState(StateServing) - action, err = tsv.decideAction(topodatapb.TabletType_MASTER, false, nil) - require.NoError(t, err) - if action != actionGracefulStop { - t.Errorf("decideAction: %v, want %v", action, actionGracefulStop) - } - if tsv.state != StateShuttingDown { - t.Errorf("tsv.state: %v, want %v", tsv.state, StateShuttingDown) - } - - tsv.setState(StateServing) - action, err = tsv.decideAction(topodatapb.TabletType_REPLICA, true, nil) - require.NoError(t, err) - if action != actionServeNewType { - t.Errorf("decideAction: %v, want %v", action, actionServeNewType) - } - if tsv.state != StateTransitioning { - t.Errorf("tsv.state: %v, want %v", tsv.state, StateTransitioning) - } - tsv.target.TabletType = topodatapb.TabletType_MASTER - - tsv.setState(StateServing) - action, err = tsv.decideAction(topodatapb.TabletType_MASTER, true, nil) - require.NoError(t, err) - if action != actionNone { - t.Errorf("decideAction: %v, want %v", action, actionNone) - } - if tsv.state != StateServing { - t.Errorf("tsv.state: %v, want %v", tsv.state, StateServing) - } - - tsv.setState(StateTransitioning) - _, err = tsv.decideAction(topodatapb.TabletType_MASTER, false, nil) - want := "cannot SetServingType" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - - tsv.setState(StateShuttingDown) - _, err = tsv.decideAction(topodatapb.TabletType_MASTER, false, nil) - want = "cannot SetServingType" - require.Error(t, err) - assert.Contains(t, err.Error(), want) -} - -func TestSetServingType(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.InitDBConfig(target, dbcfgs) - require.NoError(t, err) - - stateChanged, err := tsv.SetServingType(topodatapb.TabletType_REPLICA, false, nil) - if stateChanged != false { - t.Errorf("SetServingType() should NOT have changed the QueryService state, but did") - } - require.NoError(t, err) - checkTabletServerState(t, tsv, StateNotConnected) - - stateChanged, err = tsv.SetServingType(topodatapb.TabletType_REPLICA, true, nil) - if stateChanged != true { - t.Errorf("SetServingType() should have changed the QueryService state, but did not") - } - require.NoError(t, err) - checkTabletServerState(t, tsv, StateServing) - - stateChanged, err = tsv.SetServingType(topodatapb.TabletType_RDONLY, true, nil) - if stateChanged != true { - t.Errorf("SetServingType() should have changed the tablet type, but did not") - } - require.NoError(t, err) - checkTabletServerState(t, tsv, StateServing) - - stateChanged, err = tsv.SetServingType(topodatapb.TabletType_SPARE, false, nil) - if stateChanged != true { - t.Errorf("SetServingType() should have changed the QueryService state, but did not") - } - require.NoError(t, err) - checkTabletServerState(t, tsv, StateNotServing) - - // Verify that we exit lameduck when SetServingType is called. - tsv.EnterLameduck() - if stateName := tsv.GetState(); stateName != "NOT_SERVING" { - t.Errorf("GetState: %s, want NOT_SERVING", stateName) - } - stateChanged, err = tsv.SetServingType(topodatapb.TabletType_REPLICA, true, nil) - if stateChanged != true { - t.Errorf("SetServingType() should have changed the QueryService state, but did not") - } - require.NoError(t, err) - checkTabletServerState(t, tsv, StateServing) - if stateName := tsv.GetState(); stateName != "SERVING" { - t.Errorf("GetState: %s, want SERVING", stateName) - } - - tsv.StopService() - checkTabletServerState(t, tsv, StateNotConnected) -} - -func TestTabletServerSingleSchemaFailure(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - - want := &sqltypes.Result{ - Fields: mysql.BaseShowTablesFields, - Rows: [][]sqltypes.Value{ - mysql.BaseShowTablesRow("test_table", false, ""), - // Return a table that tabletserver can't access (the mock will reject all queries to it). - mysql.BaseShowTablesRow("rejected_table", false, ""), - }, - } - db.AddQuery(mysql.BaseShowTables, want) - - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - defer tsv.StopService() - if err != nil { - t.Fatalf("TabletServer should successfully start even if a table's schema is unloadable, but got error: %v", err) - } -} - -func TestTabletServerCheckMysql(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - defer tsv.StopService() - require.NoError(t, err) - if !tsv.isMySQLReachable() { - t.Error("isMySQLReachable should return true") - } - stateChanged, err := tsv.SetServingType(topodatapb.TabletType_SPARE, false, nil) - require.NoError(t, err) - if stateChanged != true { - t.Errorf("SetServingType() should have changed the QueryService state, but did not") - } - if !tsv.isMySQLReachable() { - t.Error("isMySQLReachable should return true") - } - checkTabletServerState(t, tsv, StateNotServing) -} - -func TestTabletServerReconnect(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - query := "select addr from test_table where pk = 1 limit 1000" - want := &sqltypes.Result{} - db.AddQuery(query, want) - db.AddQuery("select addr from test_table where 1 != 1", &sqltypes.Result{}) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) +func TestBeginOnReplica(t *testing.T) { + db, tsv := setupTabletServerTest(t) defer tsv.StopService() - - if tsv.GetState() != "SERVING" { - t.Errorf("GetState: %s, must be SERVING", tsv.GetState()) - } - if err != nil { - t.Fatalf("TabletServer.StartService should success but get error: %v", err) - } - _, err = tsv.Execute(context.Background(), &target, query, nil, 0, 0, nil) - require.NoError(t, err) - - // make mysql conn fail - db.Close() - _, err = tsv.Execute(context.Background(), &target, query, nil, 0, 0, nil) - if err == nil { - t.Error("Execute: want error, got nil") - } - time.Sleep(50 * time.Millisecond) - if tsv.GetState() == "SERVING" { - t.Error("GetState is still SERVING, must be NOT_SERVING") - } - - // make mysql conn work - db = setUpTabletServerTest(t) - db.AddQuery(query, want) - db.AddQuery("select addr from test_table where 1 != 1", &sqltypes.Result{}) - dbcfgs = newDBConfigs(db) - err = tsv.StartService(target, dbcfgs) - require.NoError(t, err) - _, err = tsv.Execute(context.Background(), &target, query, nil, 0, 0, nil) - require.NoError(t, err) -} - -func TestTabletServerTarget(t *testing.T) { - db := setUpTabletServerTest(t) defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target1 := querypb.Target{ - Keyspace: "test_keyspace", - Shard: "test_shard", - TabletType: topodatapb.TabletType_MASTER, - } - err := tsv.StartService(target1, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() - // query that works - db.AddQuery("select * from test_table limit 1000", &sqltypes.Result{}) - _, err = tsv.Execute(ctx, &target1, "select * from test_table limit 1000", nil, 0, 0, nil) - require.NoError(t, err) - - // wrong tablet type - target2 := proto.Clone(&target1).(*querypb.Target) - target2.TabletType = topodatapb.TabletType_REPLICA - _, err = tsv.Execute(ctx, target2, "select * from test_table limit 1000", nil, 0, 0, nil) - want := "invalid tablet type" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - - // set expected target type to MASTER, but also accept REPLICA - tsv.SetServingType(topodatapb.TabletType_MASTER, true, []topodatapb.TabletType{topodatapb.TabletType_REPLICA}) - _, err = tsv.Execute(ctx, &target1, "select * from test_table limit 1000", nil, 0, 0, nil) - require.NoError(t, err) - _, err = tsv.Execute(ctx, target2, "select * from test_table limit 1000", nil, 0, 0, nil) - require.NoError(t, err) - - // wrong keyspace - target2 = proto.Clone(&target1).(*querypb.Target) - target2.Keyspace = "bad" - _, err = tsv.Execute(ctx, target2, "select * from test_table limit 1000", nil, 0, 0, nil) - want = "invalid keyspace bad" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - - // wrong shard - target2 = proto.Clone(&target1).(*querypb.Target) - target2.Shard = "bad" - _, err = tsv.Execute(ctx, target2, "select * from test_table limit 1000", nil, 0, 0, nil) - want = "invalid shard bad" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - - // no target - _, err = tsv.Execute(ctx, nil, "select * from test_table limit 1000", nil, 0, 0, nil) - want = "No target" - require.Error(t, err) - assert.Contains(t, err.Error(), want) - - // Disallow all if service is stopped. - tsv.StopService() - _, err = tsv.Execute(ctx, &target1, "select * from test_table limit 1000", nil, 0, 0, nil) - want = "operation not allowed in state NOT_SERVING" - require.Error(t, err) - assert.Contains(t, err.Error(), want) -} - -func TestBeginOnReplica(t *testing.T) { - db := setUpTabletServerTest(t) db.AddQueryPattern(".*", &sqltypes.Result{}) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target1 := querypb.Target{ - Keyspace: "test_keyspace", - Shard: "test_shard", - TabletType: topodatapb.TabletType_REPLICA, - } - err := tsv.StartService(target1, dbcfgs) + target := querypb.Target{TabletType: topodatapb.TabletType_REPLICA} + _, err := tsv.SetServingType(topodatapb.TabletType_REPLICA, true, nil) require.NoError(t, err) - defer tsv.StopService() - tsv.SetServingType(topodatapb.TabletType_REPLICA, true, nil) options := querypb.ExecuteOptions{ TransactionIsolation: querypb.ExecuteOptions_CONSISTENT_SNAPSHOT_READ_ONLY, } - txID, alias, err := tsv.Begin(ctx, &target1, &options) + txID, alias, err := tsv.Begin(ctx, &target, &options) require.NoError(t, err, "failed to create read only tx on replica") assert.Equal(t, tsv.alias, *alias, "Wrong tablet alias from Begin") - _, err = tsv.Rollback(ctx, &target1, txID) + _, err = tsv.Rollback(ctx, &target, txID) require.NoError(t, err, "failed to rollback read only tx") // test that we can still create transactions even in read-only mode options = querypb.ExecuteOptions{} - txID, _, err = tsv.Begin(ctx, &target1, &options) + txID, _, err = tsv.Begin(ctx, &target, &options) require.NoError(t, err, "expected write tx to be allowed") - _, err = tsv.Rollback(ctx, &target1, txID) + _, err = tsv.Rollback(ctx, &target, txID) require.NoError(t, err) } func TestTabletServerMasterToReplica(t *testing.T) { // Reuse code from tx_executor_test. _, tsv, db := newTestTxExecutor(t) + defer tsv.StopService() defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} txid1, _, err := tsv.Begin(ctx, &target, nil) @@ -514,8 +124,8 @@ func TestTabletServerMasterToReplica(t *testing.T) { func TestTabletServerRedoLogIsKeptBetweenRestarts(t *testing.T) { // Reuse code from tx_executor_test. _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() tsv.SetServingType(topodatapb.TabletType_REPLICA, true, nil) turnOnTxEngine := func() { @@ -597,8 +207,8 @@ func TestTabletServerRedoLogIsKeptBetweenRestarts(t *testing.T) { func TestTabletServerCreateTransaction(t *testing.T) { _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} db.AddQueryPattern(fmt.Sprintf("insert into _vt\\.dt_state\\(dtid, state, time_created\\) values \\('aa', %d,.*", int(querypb.TransactionState_PREPARE)), &sqltypes.Result{}) @@ -612,8 +222,8 @@ func TestTabletServerCreateTransaction(t *testing.T) { func TestTabletServerStartCommit(t *testing.T) { _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} commitTransition := fmt.Sprintf("update _vt.dt_state set state = %d where dtid = 'aa' and state = %d", int(querypb.TransactionState_COMMIT), int(querypb.TransactionState_PREPARE)) @@ -630,8 +240,8 @@ func TestTabletServerStartCommit(t *testing.T) { func TestTabletserverSetRollback(t *testing.T) { _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} rollbackTransition := fmt.Sprintf("update _vt.dt_state set state = %d where dtid = 'aa' and state = %d", int(querypb.TransactionState_ROLLBACK), int(querypb.TransactionState_PREPARE)) @@ -648,8 +258,8 @@ func TestTabletserverSetRollback(t *testing.T) { func TestTabletServerReadTransaction(t *testing.T) { _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} db.AddQuery("select dtid, state, time_created from _vt.dt_state where dtid = 'aa'", &sqltypes.Result{}) @@ -741,8 +351,8 @@ func TestTabletServerReadTransaction(t *testing.T) { func TestTabletServerConcludeTransaction(t *testing.T) { _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} db.AddQuery("delete from _vt.dt_state where dtid = 'aa'", &sqltypes.Result{}) @@ -752,27 +362,25 @@ func TestTabletServerConcludeTransaction(t *testing.T) { } func TestTabletServerBeginFail(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.TxPool.Size = 1 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} ctx, cancel := context.WithTimeout(context.Background(), 1*time.Nanosecond) defer cancel() tsv.Begin(ctx, &target, nil) - _, _, err = tsv.Begin(ctx, &target, nil) + _, _, err := tsv.Begin(ctx, &target, nil) require.EqualError(t, err, "transaction pool aborting request due to already expired context", "Begin err") } func TestTabletServerCommitTransaction(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - // sql that will be executed in this test + executeSQL := "select * from test_table limit 1000" executeSQLResult := &sqltypes.Result{ Fields: []*querypb.Field{ @@ -783,54 +391,34 @@ func TestTabletServerCommitTransaction(t *testing.T) { }, } db.AddQuery(executeSQL, executeSQLResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() transactionID, _, err := tsv.Begin(ctx, &target, nil) - if err != nil { - t.Fatalf("call TabletServer.Begin failed: %v", err) - } - if _, err := tsv.Execute(ctx, &target, executeSQL, nil, transactionID, 0, nil); err != nil { - t.Fatalf("failed to execute query: %s: %s", executeSQL, err) - } - if _, err := tsv.Commit(ctx, &target, transactionID); err != nil { - t.Fatalf("call TabletServer.Commit failed: %v", err) - } + require.NoError(t, err) + _, err = tsv.Execute(ctx, &target, executeSQL, nil, transactionID, 0, nil) + require.NoError(t, err) + _, err = tsv.Commit(ctx, &target, transactionID) + require.NoError(t, err) } -func TestTabletServerCommitRollbackFail(t *testing.T) { - db := setUpTabletServerTest(t) +func TestTabletServerCommiRollbacktFail(t *testing.T) { + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() - _, err = tsv.Commit(ctx, &target, -1) + _, err := tsv.Commit(ctx, &target, -1) want := "transaction -1: not found" - if err == nil || err.Error() != want { - t.Fatalf("Commit err: %v, want %v", err, want) - } + require.Equal(t, want, err.Error()) _, err = tsv.Rollback(ctx, &target, -1) - if err == nil || err.Error() != want { - t.Fatalf("Commit err: %v, want %v", err, want) - } + require.Equal(t, want, err.Error()) } func TestTabletServerRollback(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - // sql that will be executed in this test + executeSQL := "select * from test_table limit 1000" executeSQLResult := &sqltypes.Result{ Fields: []*querypb.Field{ @@ -841,32 +429,24 @@ func TestTabletServerRollback(t *testing.T) { }, } db.AddQuery(executeSQL, executeSQLResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() transactionID, _, err := tsv.Begin(ctx, &target, nil) + require.NoError(t, err) if err != nil { t.Fatalf("call TabletServer.Begin failed: %v", err) } - if _, err := tsv.Execute(ctx, &target, executeSQL, nil, transactionID, 0, nil); err != nil { - t.Fatalf("failed to execute query: %s: %v", executeSQL, err) - } - if _, err := tsv.Rollback(ctx, &target, transactionID); err != nil { - t.Fatalf("call TabletServer.Rollback failed: %v", err) - } + _, err = tsv.Execute(ctx, &target, executeSQL, nil, transactionID, 0, nil) + require.NoError(t, err) + _, err = tsv.Rollback(ctx, &target, transactionID) + require.NoError(t, err) } func TestTabletServerPrepare(t *testing.T) { // Reuse code from tx_executor_test. _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} transactionID, _, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) @@ -880,8 +460,8 @@ func TestTabletServerPrepare(t *testing.T) { func TestTabletServerCommitPrepared(t *testing.T) { // Reuse code from tx_executor_test. _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} transactionID, _, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) @@ -895,17 +475,12 @@ func TestTabletServerCommitPrepared(t *testing.T) { } func TestTabletServerReserveConnection(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() db.AddQueryPattern(".*", &sqltypes.Result{}) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() options := &querypb.ExecuteOptions{} // reserve a connection @@ -922,73 +497,53 @@ func TestTabletServerReserveConnection(t *testing.T) { } func TestTabletServerExecNonExistentConnection(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() db.AddQueryPattern(".*", &sqltypes.Result{}) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() options := &querypb.ExecuteOptions{} // run a query with a non-existent reserved id - _, err = tsv.Execute(ctx, &target, "select 42", nil, 0, 123456, options) + _, err := tsv.Execute(ctx, &target, "select 42", nil, 0, 123456, options) require.Error(t, err) } func TestTabletServerReleaseNonExistentConnection(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() db.AddQueryPattern(".*", &sqltypes.Result{}) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() // run a query with a non-existent reserved id - err = tsv.Release(ctx, &target, 0, 123456) + err := tsv.Release(ctx, &target, 0, 123456) require.Error(t, err) } func TestMakeSureToCloseDbConnWhenBeginQueryFails(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() db.AddRejectedQuery("begin", errors.New("it broke")) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() options := &querypb.ExecuteOptions{} // run a query with a non-existent reserved id - _, _, _, _, err = tsv.ReserveBeginExecute(ctx, &target, "select 42", []string{}, nil, options) + _, _, _, _, err := tsv.ReserveBeginExecute(ctx, &target, "select 42", []string{}, nil, options) require.Error(t, err) } func TestTabletServerReserveAndBeginCommit(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() db.AddQueryPattern(".*", &sqltypes.Result{}) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() options := &querypb.ExecuteOptions{} // reserve a connection and a transaction @@ -1038,8 +593,8 @@ func TestTabletServerReserveAndBeginCommit(t *testing.T) { func TestTabletServerRollbackPrepared(t *testing.T) { // Reuse code from tx_executor_test. _, tsv, db := newTestTxExecutor(t) - defer db.Close() defer tsv.StopService() + defer db.Close() target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} transactionID, _, err := tsv.Begin(ctx, &target, nil) require.NoError(t, err) @@ -1052,9 +607,10 @@ func TestTabletServerRollbackPrepared(t *testing.T) { } func TestTabletServerStreamExecute(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - // sql that will be executed in this test + executeSQL := "select * from test_table limit 1000" executeSQLResult := &sqltypes.Result{ Fields: []*querypb.Field{ @@ -1066,15 +622,7 @@ func TestTabletServerStreamExecute(t *testing.T) { } db.AddQuery(executeSQL, executeSQLResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() callback := func(*sqltypes.Result) error { return nil } if err := tsv.StreamExecute(ctx, &target, executeSQL, nil, 0, nil, callback); err != nil { t.Fatalf("TabletServer.StreamExecute should success: %s, but get error: %v", @@ -1083,9 +631,10 @@ func TestTabletServerStreamExecute(t *testing.T) { } func TestTabletServerStreamExecuteComments(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - // sql that will be executed in this test + executeSQL := "/* leading */ select * from test_table limit 1000 /* trailing */" executeSQLResult := &sqltypes.Result{ Fields: []*querypb.Field{ @@ -1097,15 +646,7 @@ func TestTabletServerStreamExecuteComments(t *testing.T) { } db.AddQuery(executeSQL, executeSQLResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() callback := func(*sqltypes.Result) error { return nil } ch := tabletenv.StatsLogger.Subscribe("test stats logging") @@ -1132,23 +673,17 @@ func TestTabletServerStreamExecuteComments(t *testing.T) { } } func TestTabletServerExecuteBatch(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() + sql := "insert into test_table values (1, 2, 'addr', 'name')" sqlResult := &sqltypes.Result{} expandedSQL := "insert into test_table(pk, name, addr, name_string) values (1, 2, 'addr', 'name') /* _stream test_table (pk ) (1 ); */" db.AddQuery(sql, sqlResult) db.AddQuery(expandedSQL, sqlResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() if _, err := tsv.ExecuteBatch(ctx, &target, []*querypb.BoundQuery{ { Sql: sql, @@ -1160,36 +695,22 @@ func TestTabletServerExecuteBatch(t *testing.T) { } func TestTabletServerExecuteBatchFailEmptyQueryList(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTest(t) defer tsv.StopService() - _, err = tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{}, false, 0, nil) + defer db.Close() + + _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{}, false, 0, nil) want := "Empty query list" require.Error(t, err) assert.Contains(t, err.Error(), want) } func TestTabletServerExecuteBatchFailAsTransaction(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTest(t) defer tsv.StopService() - _, err = tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ + defer db.Close() + + _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ { Sql: "begin", BindVariables: nil, @@ -1201,19 +722,12 @@ func TestTabletServerExecuteBatchFailAsTransaction(t *testing.T) { } func TestTabletServerExecuteBatchBeginFail(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() + // make "begin" query fail db.AddRejectedQuery("begin", errRejected) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() if _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ { Sql: "begin", @@ -1225,19 +739,12 @@ func TestTabletServerExecuteBatchBeginFail(t *testing.T) { } func TestTabletServerExecuteBatchCommitFail(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() + // make "commit" query fail db.AddRejectedQuery("commit", errRejected) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() if _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ { Sql: "begin", @@ -1253,8 +760,10 @@ func TestTabletServerExecuteBatchCommitFail(t *testing.T) { } func TestTabletServerExecuteBatchSqlExecFailInTransaction(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() + sql := "insert into test_table values (1, 2)" sqlResult := &sqltypes.Result{} expandedSQL := "insert into test_table values (1, 2) /* _stream test_table (pk ) (1 ); */" @@ -1266,15 +775,7 @@ func TestTabletServerExecuteBatchSqlExecFailInTransaction(t *testing.T) { db.AddRejectedQuery(sql, errRejected) db.AddRejectedQuery(expandedSQL, errRejected) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() if db.GetQueryCalledNum("rollback") != 0 { t.Fatalf("rollback should not be executed.") } @@ -1294,17 +795,10 @@ func TestTabletServerExecuteBatchSqlExecFailInTransaction(t *testing.T) { } func TestTabletServerExecuteBatchCallCommitWithoutABegin(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTest(t) defer tsv.StopService() + defer db.Close() + if _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ { Sql: "commit", @@ -1316,23 +810,16 @@ func TestTabletServerExecuteBatchCallCommitWithoutABegin(t *testing.T) { } func TestExecuteBatchNestedTransaction(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() + sql := "insert into test_table values (1, 2)" sqlResult := &sqltypes.Result{} expandedSQL := "insert into test_table values (1, 2) /* _stream test_table (pk ) (1 ); */" db.AddQuery(sql, sqlResult) db.AddQuery(expandedSQL, sqlResult) - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } - defer tsv.StopService() if _, err := tsv.ExecuteBatch(ctx, nil, []*querypb.BoundQuery{ { Sql: "begin", @@ -1368,20 +855,16 @@ func TestSerializeTransactionsSameRow(t *testing.T) { // The actual execution looks like this: // tx1 | tx3 // tx2 - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxConcurrency = 1 // Reduce the txpool to 2 because we should never consume more than two slots. config.TxPool.Size = 2 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -1478,19 +961,15 @@ func TestSerializeTransactionsSameRow(t *testing.T) { } func TestDMLQueryWithoutWhereClause(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxConcurrency = 1 - config.TxPool.Size = 2 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} q := "delete from test_table" db.AddQuery(q+" limit 10001", &sqltypes.Result{}) @@ -1517,20 +996,16 @@ func TestSerializeTransactionsSameRow_ExecuteBatchAsTransaction(t *testing.T) { // The actual execution looks like this: // tx1 | tx3 // tx2 - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxConcurrency = 1 // Reduce the txpool to 2 because we should never consume more than two slots. config.TxPool.Size = 2 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -1633,20 +1108,16 @@ func TestSerializeTransactionsSameRow_ConcurrentTransactions(t *testing.T) { // Out of these three, two can run in parallel because we increased the // ConcurrentTransactions limit to 2. // One out of the three transaction will always get serialized though. - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxConcurrency = 2 // Reduce the txpool to 2 because we should never consume more than two slots. config.TxPool.Size = 2 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -1772,19 +1243,15 @@ func TestSerializeTransactionsSameRow_TooManyPendingRequests(t *testing.T) { // serialized. // Since we start to queue before the transaction pool would queue, we need // to enforce an upper limit as well to protect vttablet. - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxQueueSize = 1 config.HotRowProtection.MaxConcurrency = 1 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -1859,19 +1326,15 @@ func TestSerializeTransactionsSameRow_TooManyPendingRequests(t *testing.T) { func TestSerializeTransactionsSameRow_TooManyPendingRequests_ExecuteBatchAsTransaction(t *testing.T) { // This test rejects queries if more than one transaction is currently in // progress for the hot row i.e. we check that tx2 actually fails. - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxQueueSize = 1 config.HotRowProtection.MaxConcurrency = 1 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -1950,18 +1413,14 @@ func TestSerializeTransactionsSameRow_RequestCanceled(t *testing.T) { // tx1 and tx2 run against the same row. // tx2 is blocked on tx1. Eventually, tx2 is canceled and its request fails. // Only after that tx1 commits and finishes. - db := setUpTabletServerTest(t) - defer db.Close() config := tabletenv.NewDefaultConfig() config.HotRowProtection.Mode = tabletenv.Enable config.HotRowProtection.MaxConcurrency = 1 - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - if err := tsv.StartService(target, dbcfgs); err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTestCustom(t, config) defer tsv.StopService() + defer db.Close() + + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} countStart := tsv.stats.WaitTimings.Counts()["TabletServerTest.TxSerializer"] // Fake data. @@ -2441,17 +1900,9 @@ func TestACLHUP(t *testing.T) { } func TestConfigChanges(t *testing.T) { - db := setUpTabletServerTest(t) - defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) - target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - if err != nil { - t.Fatalf("StartService failed: %v", err) - } + db, tsv := setupTabletServerTest(t) defer tsv.StopService() + defer db.Close() newSize := 10 newDuration := time.Duration(10 * time.Millisecond) @@ -2514,15 +1965,10 @@ func TestConfigChanges(t *testing.T) { } func TestReserveBeginExecute(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() _, transactionID, reservedID, _, err := tsv.ReserveBeginExecute(ctx, &target, "select 42", []string{"select 43"}, nil, &querypb.ExecuteOptions{}) require.NoError(t, err) @@ -2541,15 +1987,11 @@ func TestReserveBeginExecute(t *testing.T) { } func TestReserveExecute_WithoutTx(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() _, reservedID, _, err := tsv.ReserveExecute(ctx, &target, "select 42", []string{"select 43"}, nil, 0, &querypb.ExecuteOptions{}) require.NoError(t, err) @@ -2565,15 +2007,10 @@ func TestReserveExecute_WithoutTx(t *testing.T) { } func TestReserveExecute_WithTx(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() transactionID, _, err := tsv.Begin(ctx, &target, &querypb.ExecuteOptions{}) require.NoError(t, err) @@ -2627,17 +2064,13 @@ func TestRelease(t *testing.T) { name += " reserve" } t.Run(name, func(t *testing.T) { - db := setUpTabletServerTest(t) - db.AddQueryPattern(".*", &sqltypes.Result{}) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + db.AddQueryPattern(".*", &sqltypes.Result{}) target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() + var err error var transactionID, reservedID int64 switch { @@ -2668,15 +2101,11 @@ func TestRelease(t *testing.T) { } func TestReserveStats(t *testing.T) { - db := setUpTabletServerTest(t) + db, tsv := setupTabletServerTest(t) + defer tsv.StopService() defer db.Close() - config := tabletenv.NewDefaultConfig() - tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) - dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} - err := tsv.StartService(target, dbcfgs) - require.NoError(t, err) - defer tsv.StopService() callerID := &querypb.VTGateCallerID{ Username: "test", @@ -2725,7 +2154,23 @@ func TestReserveStats(t *testing.T) { assert.NotEmpty(t, tsv.te.txPool.env.Stats().UserReservedTimesNs.Counts()["test"]) } -func setUpTabletServerTest(t *testing.T) *fakesqldb.DB { +func setupTabletServerTest(t *testing.T) (*fakesqldb.DB, *TabletServer) { + config := tabletenv.NewDefaultConfig() + return setupTabletServerTestCustom(t, config) +} + +func setupTabletServerTestCustom(t *testing.T, config *tabletenv.TabletConfig) (*fakesqldb.DB, *TabletServer) { + db := setupFakeDB(t) + tsv := NewTabletServer("TabletServerTest", config, memorytopo.NewServer(""), topodatapb.TabletAlias{}) + require.Equal(t, StateNotConnected, tsv.sm.State()) + dbcfgs := newDBConfigs(db) + target := querypb.Target{TabletType: topodatapb.TabletType_MASTER} + err := tsv.StartService(target, dbcfgs) + require.NoError(t, err) + return db, tsv +} + +func setupFakeDB(t *testing.T) *fakesqldb.DB { db := fakesqldb.New(t) for query, result := range getSupportedQueries() { db.AddQuery(query, result) @@ -2733,15 +2178,6 @@ func setUpTabletServerTest(t *testing.T) *fakesqldb.DB { return db } -func checkTabletServerState(t *testing.T, tsv *TabletServer, expectState int64) { - tsv.mu.Lock() - state := tsv.state - tsv.mu.Unlock() - if state != expectState { - t.Fatalf("TabletServer should in state: %d, but get state: %d", expectState, state) - } -} - func getSupportedQueries() map[string]*sqltypes.Result { return map[string]*sqltypes.Result{ // Queries for how row protection test (txserializer). @@ -2759,7 +2195,6 @@ func getSupportedQueries() map[string]*sqltypes.Result { RowsAffected: 1, }, // queries for twopc - sqlTurnoffBinlog: {}, fmt.Sprintf(sqlCreateSidecarDB, "_vt"): {}, fmt.Sprintf(sqlDropLegacy1, "_vt"): {}, fmt.Sprintf(sqlDropLegacy2, "_vt"): {}, diff --git a/go/vt/vttablet/tabletserver/twopc.go b/go/vt/vttablet/tabletserver/twopc.go index d990a6344a5..af8cc6a70eb 100644 --- a/go/vt/vttablet/tabletserver/twopc.go +++ b/go/vt/vttablet/tabletserver/twopc.go @@ -40,7 +40,6 @@ import ( ) const ( - sqlTurnoffBinlog = "set @@session.sql_log_bin = 0" sqlCreateSidecarDB = "create database if not exists %s" sqlDropLegacy1 = "drop table if exists %s.redo_log_transaction" @@ -123,35 +122,8 @@ type TwoPC struct { // NewTwoPC creates a TwoPC variable. func NewTwoPC(readPool *connpool.Pool) *TwoPC { - return &TwoPC{readPool: readPool} -} - -// Init initializes TwoPC. If the metadata database or tables -// are not present, they are created. -func (tpc *TwoPC) Init(dbaparams dbconfigs.Connector) error { + tpc := &TwoPC{readPool: readPool} dbname := "_vt" - conn, err := dbconnpool.NewDBConnection(context.TODO(), dbaparams) - if err != nil { - return err - } - defer conn.Close() - statements := []string{ - sqlTurnoffBinlog, - fmt.Sprintf(sqlCreateSidecarDB, dbname), - fmt.Sprintf(sqlDropLegacy1, dbname), - fmt.Sprintf(sqlDropLegacy2, dbname), - fmt.Sprintf(sqlDropLegacy3, dbname), - fmt.Sprintf(sqlDropLegacy4, dbname), - fmt.Sprintf(sqlCreateTableRedoState, dbname), - fmt.Sprintf(sqlCreateTableRedoStatement, dbname), - fmt.Sprintf(sqlCreateTableDTState, dbname), - fmt.Sprintf(sqlCreateTableDTParticipant, dbname), - } - for _, s := range statements { - if _, err := conn.ExecuteFetch(s, 0, false); err != nil { - return err - } - } tpc.insertRedoTx = sqlparser.BuildParsedQuery( "insert into %s.redo_state(dtid, state, time_created) values (%a, %a, %a)", dbname, ":dtid", ":state", ":time_created") @@ -197,12 +169,35 @@ func (tpc *TwoPC) Init(dbaparams dbconfigs.Connector) error { "select dtid, time_created from %s.dt_state where time_created < %a", dbname, ":time_created") tpc.readAllTransactions = fmt.Sprintf(sqlReadAllTransactions, dbname, dbname) - return nil + return tpc } // Open starts the TwoPC service. -func (tpc *TwoPC) Open(dbconfigs *dbconfigs.DBConfigs) { +func (tpc *TwoPC) Open(dbconfigs *dbconfigs.DBConfigs) error { + dbname := "_vt" + conn, err := dbconnpool.NewDBConnection(context.TODO(), dbconfigs.DbaWithDB()) + if err != nil { + return err + } + defer conn.Close() + statements := []string{ + fmt.Sprintf(sqlCreateSidecarDB, dbname), + fmt.Sprintf(sqlDropLegacy1, dbname), + fmt.Sprintf(sqlDropLegacy2, dbname), + fmt.Sprintf(sqlDropLegacy3, dbname), + fmt.Sprintf(sqlDropLegacy4, dbname), + fmt.Sprintf(sqlCreateTableRedoState, dbname), + fmt.Sprintf(sqlCreateTableRedoStatement, dbname), + fmt.Sprintf(sqlCreateTableDTState, dbname), + fmt.Sprintf(sqlCreateTableDTParticipant, dbname), + } + for _, s := range statements { + if _, err := conn.ExecuteFetch(s, 0, false); err != nil { + return err + } + } tpc.readPool.Open(dbconfigs.AppWithDB(), dbconfigs.DbaWithDB(), dbconfigs.DbaWithDB()) + return nil } // Close closes the TwoPC service. diff --git a/go/vt/vttablet/tabletserver/tx_engine.go b/go/vt/vttablet/tabletserver/tx_engine.go index 428ef850408..ab778b438b5 100644 --- a/go/vt/vttablet/tabletserver/tx_engine.go +++ b/go/vt/vttablet/tabletserver/tx_engine.go @@ -337,15 +337,6 @@ func (te *TxEngine) transitionTo(nextState txEngineState) error { return nil } -// Init must be called once when vttablet starts for setting -// up the metadata tables. -func (te *TxEngine) Init() error { - if te.twopcEnabled { - return te.twoPC.Init(te.env.Config().DB.DbaWithDB()) - } - return nil -} - // open opens the TxEngine. If 2pc is enabled, it restores // all previously prepared transactions from the redo log. // this should only be called when the state is already locked @@ -353,11 +344,14 @@ func (te *TxEngine) open() { te.txPool.Open(te.env.Config().DB.AppWithDB(), te.env.Config().DB.DbaWithDB(), te.env.Config().DB.AppDebugWithDB()) if te.twopcEnabled && te.state == AcceptingReadAndWrite { - te.twoPC.Open(te.env.Config().DB) + // If there are errors, we choose to raise an alert and + // continue anyway. Serving traffic is considered more important + // than blocking everything for the sake of a few transactions. + if err := te.twoPC.Open(te.env.Config().DB); err != nil { + te.env.Stats().InternalErrors.Add("TwopcOpen", 1) + log.Errorf("Could not open TwoPC engine: %v", err) + } if err := te.prepareFromRedo(); err != nil { - // If this operation fails, we choose to raise an alert and - // continue anyway. Serving traffic is considered more important - // than blocking everything for the sake of a few transactions. te.env.Stats().InternalErrors.Add("TwopcResurrection", 1) log.Errorf("Could not prepare transactions: %v", err) } diff --git a/go/vt/vttablet/tabletserver/txthrottler/tx_throttler.go b/go/vt/vttablet/tabletserver/txthrottler/tx_throttler.go index 001435e92f5..868dc8c6c48 100644 --- a/go/vt/vttablet/tabletserver/txthrottler/tx_throttler.go +++ b/go/vt/vttablet/tabletserver/txthrottler/tx_throttler.go @@ -30,6 +30,7 @@ import ( "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" + querypb "vitess.io/vitess/go/vt/proto/query" throttlerdatapb "vitess.io/vitess/go/vt/proto/throttlerdata" topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) @@ -69,6 +70,8 @@ type TxThrottler struct { // state holds an open transaction throttler state. It is nil // if the TransactionThrottler is closed. state *txThrottlerState + + target querypb.Target } // NewTxThrottler tries to construct a TxThrottler from the @@ -91,6 +94,11 @@ func NewTxThrottler(config *tabletenv.TabletConfig, topoServer *topo.Server) *Tx return txThrottler } +// InitDBConfig initializes the target parameters for the throttler. +func (t *TxThrottler) InitDBConfig(target querypb.Target) { + t.target = target +} + func tryCreateTxThrottler(config *tabletenv.TabletConfig, topoServer *topo.Server) (*TxThrottler, error) { if !config.EnableTxThrottler { return newTxThrottler(&txThrottlerConfig{enabled: false}) @@ -210,15 +218,15 @@ func newTxThrottler(config *txThrottlerConfig) (*TxThrottler, error) { } // Open opens the transaction throttler. It must be called prior to 'Throttle'. -func (t *TxThrottler) Open(keyspace, shard string) error { +func (t *TxThrottler) Open() error { if !t.config.enabled { return nil } if t.state != nil { - return fmt.Errorf("transaction throttler already opened") + return nil } var err error - t.state, err = newTxThrottlerState(t.config, keyspace, shard) + t.state, err = newTxThrottlerState(t.config, t.target.Keyspace, t.target.Shard) return err } diff --git a/go/vt/vttablet/tabletserver/txthrottler/tx_throttler_test.go b/go/vt/vttablet/tabletserver/txthrottler/tx_throttler_test.go index 6fea2c65379..e0b3b6d85a7 100644 --- a/go/vt/vttablet/tabletserver/txthrottler/tx_throttler_test.go +++ b/go/vt/vttablet/tabletserver/txthrottler/tx_throttler_test.go @@ -39,7 +39,11 @@ func TestDisabledThrottler(t *testing.T) { config := tabletenv.NewDefaultConfig() config.EnableTxThrottler = false throttler := NewTxThrottler(config, nil) - if err := throttler.Open("keyspace", "shard"); err != nil { + throttler.InitDBConfig(querypb.Target{ + Keyspace: "keyspace", + Shard: "shard", + }) + if err := throttler.Open(); err != nil { t.Fatalf("want: nil, got: %v", err) } if result := throttler.Throttle(); result != false { @@ -117,7 +121,11 @@ func TestEnabledThrottler(t *testing.T) { if err != nil { t.Fatalf("want: nil, got: %v", err) } - if err := throttler.Open("keyspace", "shard"); err != nil { + throttler.InitDBConfig(querypb.Target{ + Keyspace: "keyspace", + Shard: "shard", + }) + if err := throttler.Open(); err != nil { t.Fatalf("want: nil, got: %v", err) } if result := throttler.Throttle(); result != false { diff --git a/go/vt/vttablet/tabletserver/vstreamer/engine.go b/go/vt/vttablet/tabletserver/vstreamer/engine.go index c8d3455827f..6cf1eeb7391 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/engine.go +++ b/go/vt/vttablet/tabletserver/vstreamer/engine.go @@ -40,19 +40,24 @@ import ( // Engine is the engine for handling vreplication streaming requests. type Engine struct { - env tabletenv.Env + env tabletenv.Env + ts srvtopo.Server + se *schema.Engine + cell string - // mu protects isOpen, streamers, streamIdx and vschema. - mu sync.Mutex + // keyspace is initialized by InitDBConfig + keyspace string - isOpen bool // wg is incremented for every Stream, and decremented on end. // Close waits for all current streams to end by waiting on wg. - wg sync.WaitGroup + wg sync.WaitGroup + + mu sync.Mutex + isOpen bool + streamIdx int streamers map[int]*uvstreamer rowStreamers map[int]*rowStreamer resultStreamers map[int]*resultStreamer - streamIdx int // watcherOnce is used for initializing vschema // and setting up the vschema watch. It's guaranteed that @@ -61,12 +66,7 @@ type Engine struct { watcherOnce sync.Once lvschema *localVSchema - // The following members are initialized once at the beginning. - ts srvtopo.Server - se *schema.Engine - keyspace string - cell string - + // stats variables vschemaErrors *stats.Counter vschemaUpdates *stats.Counter } @@ -74,33 +74,40 @@ type Engine struct { // NewEngine creates a new Engine. // Initialization sequence is: NewEngine->InitDBConfig->Open. // Open and Close can be called multiple times and are idempotent. -func NewEngine(env tabletenv.Env, ts srvtopo.Server, se *schema.Engine) *Engine { +func NewEngine(env tabletenv.Env, ts srvtopo.Server, se *schema.Engine, cell string) *Engine { vse := &Engine{ - env: env, + env: env, + ts: ts, + se: se, + cell: cell, + streamers: make(map[int]*uvstreamer), rowStreamers: make(map[int]*rowStreamer), resultStreamers: make(map[int]*resultStreamer), - lvschema: &localVSchema{vschema: &vindexes.VSchema{}}, - ts: ts, - se: se, - vschemaErrors: env.Exporter().NewCounter("VSchemaErrors", "Count of VSchema errors"), - vschemaUpdates: env.Exporter().NewCounter("VSchemaUpdates", "Count of VSchema updates. Does not include errors"), + + lvschema: &localVSchema{vschema: &vindexes.VSchema{}}, + + vschemaErrors: env.Exporter().NewCounter("VSchemaErrors", "Count of VSchema errors"), + vschemaUpdates: env.Exporter().NewCounter("VSchemaUpdates", "Count of VSchema updates. Does not include errors"), } env.Exporter().HandleFunc("/debug/tablet_vschema", vse.ServeHTTP) return vse } +// InitDBConfig initializes the target parameters for the Engine. +func (vse *Engine) InitDBConfig(keyspace string) { + vse.keyspace = keyspace +} + // Open starts the Engine service. -func (vse *Engine) Open(keyspace, cell string) error { +func (vse *Engine) Open() { vse.mu.Lock() defer vse.mu.Unlock() if vse.isOpen { - return nil + return } + log.Info("VStreamer is open.") vse.isOpen = true - vse.keyspace = keyspace - vse.cell = cell - return nil } // IsOpen checks if the engine is opened @@ -134,6 +141,7 @@ func (vse *Engine) Close() { // Wait only after releasing the lock because the end of every // stream will use the lock to remove the entry from streamers. vse.wg.Wait() + log.Info("VStreamer is closed.") } func (vse *Engine) vschema() *vindexes.VSchema { diff --git a/go/vt/vttablet/tabletserver/vstreamer/main_test.go b/go/vt/vttablet/tabletserver/vstreamer/main_test.go index c5b0f6ac0e0..37c178d8765 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/main_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/main_test.go @@ -52,8 +52,9 @@ func TestMain(m *testing.M) { // engine cannot be initialized in testenv because it introduces // circular dependencies - engine = NewEngine(env.TabletEnv, env.SrvTopo, env.SchemaEngine) - engine.Open(env.KeyspaceName, env.Cells[0]) + engine = NewEngine(env.TabletEnv, env.SrvTopo, env.SchemaEngine, env.Cells[0]) + engine.InitDBConfig(env.KeyspaceName) + engine.Open() defer engine.Close() return m.Run() @@ -68,7 +69,8 @@ func customEngine(t *testing.T, modifier func(mysql.ConnParams) mysql.ConnParams config := env.TabletEnv.Config().Clone() config.DB = dbconfigs.NewTestDBConfigs(modified, modified, modified.DbName) - engine := NewEngine(tabletenv.NewEnv(config, "VStreamerTest"), env.SrvTopo, env.SchemaEngine) - engine.Open(env.KeyspaceName, env.Cells[0]) + engine := NewEngine(tabletenv.NewEnv(config, "VStreamerTest"), env.SrvTopo, env.SchemaEngine, env.Cells[0]) + engine.InitDBConfig(env.KeyspaceName) + engine.Open() return engine } diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go index d7f13005720..445eff89464 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go @@ -424,6 +424,10 @@ func (vs *vstreamer) parseEvent(ev mysql.BinlogEvent) ([]*binlogdatapb.VEvent, e Type: binlogdatapb.VEventType_DDL, Ddl: q.SQL, }) + // Reload schema only if the DDL change is relevant. + // TODO(sougou): move this back to always load after + // the schema reload bug is fixed. + vs.se.ReloadAt(context.Background(), vs.pos) } else { // If the DDL need not be sent, send a dummy OTHER event. vevents = append(vevents, &binlogdatapb.VEvent{ @@ -433,7 +437,6 @@ func (vs *vstreamer) parseEvent(ev mysql.BinlogEvent) ([]*binlogdatapb.VEvent, e Type: binlogdatapb.VEventType_OTHER, }) } - vs.se.ReloadAt(context.Background(), vs.pos) case sqlparser.StmtOther, sqlparser.StmtPriv: // These are either: // 1) DBA statements like REPAIR that can be ignored. diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go index ab8431d2748..60f093c44f8 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go @@ -57,8 +57,9 @@ func TestVersion(t *testing.T) { require.NoError(t, err) defer env.SchemaEngine.EnableHistorian(false) - engine = NewEngine(engine.env, env.SrvTopo, env.SchemaEngine) - engine.Open(env.KeyspaceName, env.Cells[0]) + engine = NewEngine(engine.env, env.SrvTopo, env.SchemaEngine, env.Cells[0]) + engine.InitDBConfig(env.KeyspaceName) + engine.Open() defer engine.Close() execStatements(t, []string{ diff --git a/go/vt/withddl/withddl.go b/go/vt/withddl/withddl.go index 95fb3fd3aaf..c00afc35e68 100644 --- a/go/vt/withddl/withddl.go +++ b/go/vt/withddl/withddl.go @@ -26,6 +26,7 @@ import ( "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/sqlparser" ) // WithDDL allows you to execute statements against @@ -64,7 +65,7 @@ func (wd *WithDDL) Exec(ctx context.Context, query string, f interface{}) (*sqlt return nil, err } - log.Infof("Updating schema for %v and retrying: %v", query, err) + log.Infof("Updating schema for %v and retrying: %v", sqlparser.TruncateForUI(err.Error()), err) for _, query := range wd.ddls { _, merr := exec(query) if merr == nil {