diff --git a/go/cmd/vtctld/schema.go b/go/cmd/vtctld/schema.go index ddf178842c2..df93cbc393c 100644 --- a/go/cmd/vtctld/schema.go +++ b/go/cmd/vtctld/schema.go @@ -36,7 +36,9 @@ var ( schemaChangeController = flag.String("schema_change_controller", "", "schema change controller is responsible for finding schema changes and responding to schema change events") schemaChangeCheckInterval = flag.Int("schema_change_check_interval", 60, "this value decides how often we check schema change dir, in seconds") schemaChangeUser = flag.String("schema_change_user", "", "The user who submits this schema change.") - schemaChangeSlaveTimeout = flag.Duration("schema_change_slave_timeout", 10*time.Second, "how long to wait for replicas to receive the schema change") + // for backwards compatibility + deprecatedTimeout = flag.Duration("schema_change_slave_timeout", wrangler.DefaultWaitReplicasTimeout, "DEPRECATED -- use -schema_change_replicas_timeout instead") + schemaChangeReplicasTimeout = flag.Duration("schema_change_replicas_timeout", wrangler.DefaultWaitReplicasTimeout, "how long to wait for replicas to receive the schema change") ) func initSchema() { @@ -46,6 +48,9 @@ func initSchema() { if *schemaChangeCheckInterval > 0 { interval = *schemaChangeCheckInterval } + if *deprecatedTimeout != 10*time.Second { + *schemaChangeReplicasTimeout = *deprecatedTimeout + } timer := timer.NewTimer(time.Duration(interval) * time.Second) controllerFactory, err := schemamanager.GetControllerFactory(*schemaChangeController) @@ -67,7 +72,7 @@ func initSchema() { err = schemamanager.Run( ctx, controller, - schemamanager.NewTabletExecutor(wr, *schemaChangeSlaveTimeout), + schemamanager.NewTabletExecutor(wr, *schemaChangeReplicasTimeout), ) if err != nil { log.Errorf("Schema change failed, error: %v", err) diff --git a/go/test/endtoend/cluster/vtctlclient_process.go b/go/test/endtoend/cluster/vtctlclient_process.go index edaa2fe9a52..57fbdebda7c 100644 --- a/go/test/endtoend/cluster/vtctlclient_process.go +++ b/go/test/endtoend/cluster/vtctlclient_process.go @@ -38,7 +38,7 @@ type VtctlClientProcess struct { func (vtctlclient *VtctlClientProcess) InitShardMaster(Keyspace string, Shard string, Cell string, TabletUID int) (err error) { return vtctlclient.ExecuteCommand( "InitShardMaster", - "-force", + "-force", "-wait_replicas_timeout", "31s", fmt.Sprintf("%s/%s", Keyspace, Shard), fmt.Sprintf("%s-%d", Cell, TabletUID)) } diff --git a/go/test/endtoend/reparent/reparent_test.go b/go/test/endtoend/reparent/reparent_test.go index c1c9817753a..d463e102e0e 100644 --- a/go/test/endtoend/reparent/reparent_test.go +++ b/go/test/endtoend/reparent/reparent_test.go @@ -121,7 +121,8 @@ func TestReparentDownMaster(t *testing.T) { err = clusterInstance.VtctlclientProcess.ExecuteCommand( "EmergencyReparentShard", "-keyspace_shard", keyspaceShard, - "-new_master", tablet62044.Alias) + "-new_master", tablet62044.Alias, + "-wait_replicas_timeout", "31s") require.Nil(t, err) validateTopology(t, false) @@ -324,7 +325,9 @@ func TestReparentReplicaOffline(t *testing.T) { out, err := clusterInstance.VtctlclientProcess.ExecuteCommandWithOutput( "PlannedReparentShard", "-keyspace_shard", keyspaceShard, - "-new_master", tablet62044.Alias) + "-new_master", tablet62044.Alias, + "-wait_replicas_timeout", "31s") + require.Error(t, err) assert.Contains(t, out, "tablet zone2-0000031981 SetMaster failed") @@ -635,9 +638,10 @@ func TestChangeTypeSemiSync(t *testing.T) { } // Updated rdonly tablet and set tablet type to rdonly + // TODO: replace with ChangeTabletType once ChangeSlaveType is removed err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonly1.Alias, "rdonly") require.Nil(t, err) - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonly2.Alias, "rdonly") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly2.Alias, "rdonly") require.Nil(t, err) validateTopology(t, true) @@ -646,7 +650,7 @@ func TestChangeTypeSemiSync(t *testing.T) { // Stop replication on rdonly1, to make sure when we make it replica it doesn't start again. // Note we do a similar test for replica -> rdonly below. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopSlave", rdonly1.Alias) + err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", rdonly1.Alias) require.Nil(t, err) // Check semi-sync on replicas. @@ -661,27 +665,27 @@ func TestChangeTypeSemiSync(t *testing.T) { checkDBstatus(ctx, t, rdonly2, "Rpl_semi_sync_slave_status", "OFF") // Change replica to rdonly while replicating, should turn off semi-sync, and restart replication. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", replica.Alias, "rdonly") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replica.Alias, "rdonly") require.Nil(t, err) checkDBvar(ctx, t, replica, "rpl_semi_sync_slave_enabled", "OFF") checkDBstatus(ctx, t, replica, "Rpl_semi_sync_slave_status", "OFF") // Change rdonly1 to replica, should turn on semi-sync, and not start replication. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonly1.Alias, "replica") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly1.Alias, "replica") require.Nil(t, err) checkDBvar(ctx, t, rdonly1, "rpl_semi_sync_slave_enabled", "ON") checkDBstatus(ctx, t, rdonly1, "Rpl_semi_sync_slave_status", "OFF") checkReplicaStatus(ctx, t, rdonly1) // Now change from replica back to rdonly, make sure replication is still not enabled. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonly1.Alias, "rdonly") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly1.Alias, "rdonly") require.Nil(t, err) checkDBvar(ctx, t, rdonly1, "rpl_semi_sync_slave_enabled", "OFF") checkDBstatus(ctx, t, rdonly1, "Rpl_semi_sync_slave_status", "OFF") checkReplicaStatus(ctx, t, rdonly1) // Change rdonly2 to replica, should turn on semi-sync, and restart replication. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonly2.Alias, "replica") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly2.Alias, "replica") require.Nil(t, err) checkDBvar(ctx, t, rdonly2, "rpl_semi_sync_slave_enabled", "ON") checkDBstatus(ctx, t, rdonly2, "Rpl_semi_sync_slave_status", "ON") diff --git a/go/test/endtoend/tabletmanager/tablet_health_test.go b/go/test/endtoend/tabletmanager/tablet_health_test.go index 29ef62fab91..0fad0dbc81a 100644 --- a/go/test/endtoend/tabletmanager/tablet_health_test.go +++ b/go/test/endtoend/tabletmanager/tablet_health_test.go @@ -130,6 +130,7 @@ func TestHealthCheck(t *testing.T) { exec(t, masterConn, "stop slave") // stop replication, make sure we don't go unhealthy. + // TODO: replace with StopReplication once StopSlave has been removed err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopSlave", rTablet.Alias) require.NoError(t, err) err = clusterInstance.VtctlclientProcess.ExecuteCommand("RunHealthCheck", rTablet.Alias) @@ -141,7 +142,7 @@ func TestHealthCheck(t *testing.T) { verifyStreamHealth(t, result) // then restart replication, make sure we stay healthy - err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopSlave", rTablet.Alias) + err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", rTablet.Alias) require.NoError(t, err) err = clusterInstance.VtctlclientProcess.ExecuteCommand("RunHealthCheck", rTablet.Alias) require.NoError(t, err) @@ -220,13 +221,13 @@ func TestHealthCheckDrainedStateDoesNotShutdownQueryService(t *testing.T) { // actions are similar to the SplitClone vtworker command // implementation.) The tablet will stay healthy, and the // query service is still running. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonlyTablet.Alias, "drained") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonlyTablet.Alias, "drained") require.NoError(t, err) // Trying to drain the same tablet again, should error - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonlyTablet.Alias, "drained") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonlyTablet.Alias, "drained") assert.Error(t, err, "already drained") - err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopSlave", rdonlyTablet.Alias) + err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", rdonlyTablet.Alias) require.NoError(t, err) // Trigger healthcheck explicitly to avoid waiting for the next interval. err = clusterInstance.VtctlclientProcess.ExecuteCommand("RunHealthCheck", rdonlyTablet.Alias) @@ -239,7 +240,7 @@ func TestHealthCheckDrainedStateDoesNotShutdownQueryService(t *testing.T) { require.NoError(t, err) // Restart replication. Tablet will become healthy again. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeSlaveType", rdonlyTablet.Alias, "rdonly") + err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonlyTablet.Alias, "rdonly") require.NoError(t, err) err = clusterInstance.VtctlclientProcess.ExecuteCommand("StartSlave", rdonlyTablet.Alias) require.NoError(t, err) @@ -382,9 +383,9 @@ func TestNoMysqlHealthCheck(t *testing.T) { checkHealth(t, rTablet.HTTPPort, true) // Tell replica to not try to repair replication in healthcheck. - // The StopSlave will ultimately fail because mysqld is not running, + // The StopReplication will ultimately fail because mysqld is not running, // But vttablet should remember that it's not supposed to fix replication. - err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopSlave", rTablet.Alias) + err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", rTablet.Alias) assert.Error(t, err, "Fail as mysqld not running") //The above notice to not fix replication should survive tablet restart. diff --git a/go/vt/vtctl/reparent.go b/go/vt/vtctl/reparent.go index af8a79e984c..dc56e9b898a 100644 --- a/go/vt/vtctl/reparent.go +++ b/go/vt/vtctl/reparent.go @@ -19,7 +19,6 @@ package vtctl import ( "flag" "fmt" - "time" "golang.org/x/net/context" "vitess.io/vitess/go/vt/topo" @@ -40,17 +39,17 @@ func init() { addCommand("Shards", command{ "InitShardMaster", commandInitShardMaster, - "[-force] [-wait_slave_timeout=] ", + "[-force] [-wait_replicas_timeout=] ", "Sets the initial master for a shard. Will make all other tablets in the shard replicas of the provided master. WARNING: this could cause data loss on an already replicating shard. PlannedReparentShard or EmergencyReparentShard should be used instead."}) addCommand("Shards", command{ "PlannedReparentShard", commandPlannedReparentShard, - "-keyspace_shard= [-new_master=] [-avoid_master=] [-wait_slave_timeout=]", + "-keyspace_shard= [-new_master=] [-avoid_master=] [-wait_replicas_timeout=]", "Reparents the shard to the new master, or away from old master. Both old and new master need to be up and running."}) addCommand("Shards", command{ "EmergencyReparentShard", commandEmergencyReparentShard, - "-keyspace_shard= -new_master=", + "-keyspace_shard= -new_master= [-wait_replicas_timeout=]", "Reparents the shard to the new master. Assumes the old master is dead and not responsding."}) addCommand("Shards", command{ "TabletExternallyReparented", @@ -84,7 +83,12 @@ func commandInitShardMaster(ctx context.Context, wr *wrangler.Wrangler, subFlags } force := subFlags.Bool("force", false, "will force the reparent even if the provided tablet is not a master or the shard master") - waitReplicasTimeout := subFlags.Duration("wait_slave_timeout", 30*time.Second, "time to wait for replicas to catch up in reparenting") + // for backwards compatibility + deprecatedTimeout := subFlags.Duration("wait_slave_timeout", *topo.RemoteOperationTimeout, "DEPRECATED -- use -wait_replicas_timeout") + waitReplicasTimeout := subFlags.Duration("wait_replicas_timeout", *topo.RemoteOperationTimeout, "time to wait for replicas to catch up in reparenting") + if *deprecatedTimeout != *topo.RemoteOperationTimeout { + *waitReplicasTimeout = *deprecatedTimeout + } if err := subFlags.Parse(args); err != nil { return err } @@ -107,7 +111,12 @@ func commandPlannedReparentShard(ctx context.Context, wr *wrangler.Wrangler, sub return fmt.Errorf("active reparent commands disabled (unset the -disable_active_reparents flag to enable)") } - waitReplicasTimeout := subFlags.Duration("wait_slave_timeout", *topo.RemoteOperationTimeout, "time to wait for replicas to catch up on replication before and after reparenting") + // for backwards compatibility + deprecatedTimeout := subFlags.Duration("wait_slave_timeout", *topo.RemoteOperationTimeout, "DEPRECATED -- use -wait_replicas_timeout") + waitReplicasTimeout := subFlags.Duration("wait_replicas_timeout", *topo.RemoteOperationTimeout, "time to wait for replicas to catch up on replication before and after reparenting") + if *deprecatedTimeout != *topo.RemoteOperationTimeout { + *waitReplicasTimeout = *deprecatedTimeout + } keyspaceShard := subFlags.String("keyspace_shard", "", "keyspace/shard of the shard that needs to be reparented") newMaster := subFlags.String("new_master", "", "alias of a tablet that should be the new master") avoidMaster := subFlags.String("avoid_master", "", "alias of a tablet that should not be the master, i.e. reparent to any other tablet if this one is the master") @@ -150,7 +159,12 @@ func commandEmergencyReparentShard(ctx context.Context, wr *wrangler.Wrangler, s return fmt.Errorf("active reparent commands disabled (unset the -disable_active_reparents flag to enable)") } - waitReplicasTimeout := subFlags.Duration("wait_slave_timeout", 30*time.Second, "time to wait for replicas to catch up in reparenting") + // for backwards compatibility + deprecatedTimeout := subFlags.Duration("wait_slave_timeout", *topo.RemoteOperationTimeout, "DEPRECATED -- use -wait_replicas_timeout") + waitReplicasTimeout := subFlags.Duration("wait_replicas_timeout", *topo.RemoteOperationTimeout, "time to wait for replicas to catch up in reparenting") + if *deprecatedTimeout != *topo.RemoteOperationTimeout { + *waitReplicasTimeout = *deprecatedTimeout + } keyspaceShard := subFlags.String("keyspace_shard", "", "keyspace/shard of the shard that needs to be reparented") newMaster := subFlags.String("new_master", "", "alias of a tablet that should be the new master") if err := subFlags.Parse(args); err != nil { diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index eb103f4db9d..40331f5d08f 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -173,12 +173,22 @@ var commands = []commandGroup{ "", "Sets the tablet as read-write."}, {"StartSlave", commandStartReplication, - "", + "DEPRECATED -- Use StartReplication ", + "Starts replication on the specified tablet."}, + {"StartReplication", commandStartReplication, + "", "Starts replication on the specified tablet."}, {"StopSlave", commandStopReplication, + "DEPRECATED -- Use StopReplication ", + "Stops replication on the specified tablet."}, + {"StopReplication", commandStopReplication, "", "Stops replication on the specified tablet."}, {"ChangeSlaveType", commandChangeTabletType, + "DEPRECATED -- Use ChangeTabletType [-dry-run] ", + "Changes the db type for the specified tablet, if possible. This command is used primarily to arrange replicas, and it will not convert a master.\n" + + "NOTE: This command automatically updates the serving graph.\n"}, + {"ChangeTabletType", commandChangeTabletType, "[-dry-run] ", "Changes the db type for the specified tablet, if possible. This command is used primarily to arrange replicas, and it will not convert a master.\n" + "NOTE: This command automatically updates the serving graph.\n"}, @@ -391,10 +401,10 @@ var commands = []commandGroup{ "[-exclude_tables=''] [-include-views] [-skip-no-master] ", "Validates that the master schema from shard 0 matches the schema on all of the other tablets in the keyspace."}, {"ApplySchema", commandApplySchema, - "[-allow_long_unavailability] [-wait_slave_timeout=10s] {-sql= || -sql-file=} ", + "[-allow_long_unavailability] [-wait_replicas_timeout=10s] {-sql= || -sql-file=} ", "Applies the schema change to the specified keyspace on every master, running in parallel on all shards. The changes are then propagated to replicas via replication. If -allow_long_unavailability is set, schema changes affecting a large number of rows (and possibly incurring a longer period of unavailability) will not be rejected."}, {"CopySchemaShard", commandCopySchemaShard, - "[-tables=,,...] [-exclude_tables=,,...] [-include-views] [-skip-verify] [-wait_slave_timeout=10s] { || } ", + "[-tables=,,...] [-exclude_tables=,,...] [-include-views] [-skip-verify] [-wait_replicas_timeout=10s] { || } ", "Copies the schema from a source shard's master (or a specific tablet) to a destination shard. The schema is applied directly on the master of the destination shard, and it is propagated to the replicas through binlogs."}, {"ValidateVersionShard", commandValidateVersionShard, @@ -887,7 +897,7 @@ func commandStartReplication(ctx context.Context, wr *wrangler.Wrangler, subFlag return err } if subFlags.NArg() != 1 { - return fmt.Errorf("action StartSlave requires ") + return fmt.Errorf("action StartReplication requires ") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) @@ -906,7 +916,7 @@ func commandStopReplication(ctx context.Context, wr *wrangler.Wrangler, subFlags return err } if subFlags.NArg() != 1 { - return fmt.Errorf("action StopSlave requires ") + return fmt.Errorf("action StopReplication requires ") } tabletAlias, err := topoproto.ParseTabletAlias(subFlags.Arg(0)) @@ -2364,7 +2374,12 @@ func commandApplySchema(ctx context.Context, wr *wrangler.Wrangler, subFlags *fl allowLongUnavailability := subFlags.Bool("allow_long_unavailability", false, "Allow large schema changes which incur a longer unavailability of the database.") sql := subFlags.String("sql", "", "A list of semicolon-delimited SQL commands") sqlFile := subFlags.String("sql-file", "", "Identifies the file that contains the SQL commands") - waitReplicasTimeout := subFlags.Duration("wait_slave_timeout", wrangler.DefaultWaitReplicasTimeout, "The amount of time to wait for replicas to receive the schema change via replication.") + // for backwards compatibility + deprecatedTimeout := subFlags.Duration("wait_slave_timeout", wrangler.DefaultWaitReplicasTimeout, "DEPRECATED -- use -wait_replicas_timeout") + waitReplicasTimeout := subFlags.Duration("wait_replicas_timeout", wrangler.DefaultWaitReplicasTimeout, "The amount of time to wait for replicas to receive the schema change via replication.") + if *deprecatedTimeout != wrangler.DefaultWaitReplicasTimeout { + *waitReplicasTimeout = *deprecatedTimeout + } if err := subFlags.Parse(args); err != nil { return err } @@ -2394,7 +2409,12 @@ func commandCopySchemaShard(ctx context.Context, wr *wrangler.Wrangler, subFlags excludeTables := subFlags.String("exclude_tables", "", "Specifies a comma-separated list of tables to exclude. Each is either an exact match, or a regular expression of the form /regexp/") includeViews := subFlags.Bool("include-views", true, "Includes views in the output") skipVerify := subFlags.Bool("skip-verify", false, "Skip verification of source and target schema after copy") - waitReplicasTimeout := subFlags.Duration("wait_slave_timeout", 10*time.Second, "The amount of time to wait for replicas to receive the schema change via replication.") + // for backwards compatibility + deprecatedTimeout := subFlags.Duration("wait_slave_timeout", wrangler.DefaultWaitReplicasTimeout, "DEPRECATED -- use -wait_replicas_timeout") + waitReplicasTimeout := subFlags.Duration("wait_replicas_timeout", wrangler.DefaultWaitReplicasTimeout, "The amount of time to wait for replicas to receive the schema change via replication.") + if *deprecatedTimeout != wrangler.DefaultWaitReplicasTimeout { + *waitReplicasTimeout = *deprecatedTimeout + } if err := subFlags.Parse(args); err != nil { return err } diff --git a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go index 3bc4025875e..c24c1dbe6eb 100644 --- a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go @@ -122,6 +122,7 @@ func TestEmergencyReparentShard(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run EmergencyReparentShard + // using deprecated flag until it is removed completely. at that time this should be replaced with -wait_replicas_timeout err := vp.Run([]string{"EmergencyReparentShard", "-wait_slave_timeout", "10s", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, topoproto.TabletAliasString(newMaster.Tablet.Alias)}) require.NoError(t, err) diff --git a/go/vt/wrangler/testlib/init_shard_master_test.go b/go/vt/wrangler/testlib/init_shard_master_test.go index 632532e624e..ccab2d8bb42 100644 --- a/go/vt/wrangler/testlib/init_shard_master_test.go +++ b/go/vt/wrangler/testlib/init_shard_master_test.go @@ -128,6 +128,7 @@ func TestInitMasterShard(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run InitShardMaster + // using deprecated flag until it is removed completely. at that time this should be replaced with -wait_replicas_timeout if err := vp.Run([]string{"InitShardMaster", "-wait_slave_timeout", "10s", master.Tablet.Keyspace + "/" + master.Tablet.Shard, topoproto.TabletAliasString(master.Tablet.Alias)}); err != nil { t.Fatalf("InitShardMaster failed: %v", err) } diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index 1d2de79791a..9ddaef4f38a 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -111,6 +111,7 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard + // using deprecated flag until it is removed completely. at that time this should be replaced with -wait_replicas_timeout err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard}) require.NoError(t, err) @@ -224,7 +225,7 @@ func TestPlannedReparentShardNoError(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) require.NoError(t, err) @@ -266,7 +267,7 @@ func TestPlannedReparentNoMaster(t *testing.T) { NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", replica1.Tablet.Keyspace + "/" + replica1.Tablet.Shard, "-new_master", topoproto.TabletAliasString(replica1.Tablet.Alias)}) + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", replica1.Tablet.Keyspace + "/" + replica1.Tablet.Shard, "-new_master", topoproto.TabletAliasString(replica1.Tablet.Alias)}) assert.Error(t, err) assert.Contains(t, err.Error(), "the shard has no master") } @@ -356,7 +357,7 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) assert.Error(t, err) assert.Contains(t, err.Error(), "replication on master-elect cell1-0000000001 did not catch up in time") @@ -450,7 +451,7 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) assert.Error(t, err) assert.Contains(t, err.Error(), "replication on master-elect cell1-0000000001 did not catch up in time") @@ -506,7 +507,7 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", topoproto.TabletAliasString(master.Tablet.Alias)}) require.NoError(t, err) // check what was run @@ -524,7 +525,11 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") } -func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { +// TestPlannedReparentShardRelayLogErrorStartReplication is similar to +// TestPlannedReparentShardRelayLogError with the difference that goodReplica1 +// is not replicating to start with (IO_Thread is not running) and we +// simulate an error from the attempt to start replication +func TestPlannedReparentShardRelayLogErrorStartReplication(t *testing.T) { ts := memorytopo.NewServer("cell1") wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) vp := NewVtctlPipe(t, ts) @@ -556,7 +561,7 @@ func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { defer master.StopActionLoop(t) master.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) - // good replica 1 is not replicating + // goodReplica1 is not replicating goodReplica1.FakeMysqlDaemon.ReadOnly = true goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.IOThreadRunning = false @@ -574,7 +579,7 @@ func TestPlannedReparentShardRelayLogErrorStartSlave(t *testing.T) { defer goodReplica1.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", master.Tablet.Keyspace + "/" + master.Tablet.Shard, "-new_master", topoproto.TabletAliasString(master.Tablet.Alias)}) require.NoError(t, err) // check what was run @@ -679,7 +684,7 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) assert.Error(t, err) assert.Contains(t, err.Error(), "some error") @@ -711,7 +716,7 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { } // run PlannedReparentShard - err = vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) + err = vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", newMaster.Tablet.Keyspace + "/" + newMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(newMaster.Tablet.Alias)}) require.NoError(t, err) // check that mastership changed correctly @@ -778,7 +783,7 @@ func TestPlannedReparentShardSameMaster(t *testing.T) { defer goodReplica2.StopActionLoop(t) // run PlannedReparentShard - err := vp.Run([]string{"PlannedReparentShard", "-wait_slave_timeout", "10s", "-keyspace_shard", oldMaster.Tablet.Keyspace + "/" + oldMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(oldMaster.Tablet.Alias)}) + err := vp.Run([]string{"PlannedReparentShard", "-wait_replicas_timeout", "10s", "-keyspace_shard", oldMaster.Tablet.Keyspace + "/" + oldMaster.Tablet.Shard, "-new_master", topoproto.TabletAliasString(oldMaster.Tablet.Alias)}) require.NoError(t, err) assert.False(t, oldMaster.FakeMysqlDaemon.ReadOnly, "oldMaster.FakeMysqlDaemon.ReadOnly") }