diff --git a/go/vt/mysqlctl/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon.go index 9cf64914f67..10877a4d6d1 100644 --- a/go/vt/mysqlctl/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon.go @@ -458,7 +458,6 @@ func (fmd *FakeMysqlDaemon) SetReplicationSource(ctx context.Context, host strin if stopReplicationBefore { cmds = append(cmds, "STOP SLAVE") } - cmds = append(cmds, "RESET SLAVE ALL") cmds = append(cmds, "FAKE SET MASTER") if startReplicationAfter { cmds = append(cmds, "START SLAVE") diff --git a/go/vt/mysqlctl/replication.go b/go/vt/mysqlctl/replication.go index 86317980d05..e180cd1e9c9 100644 --- a/go/vt/mysqlctl/replication.go +++ b/go/vt/mysqlctl/replication.go @@ -461,14 +461,6 @@ func (mysqld *Mysqld) SetReplicationSource(ctx context.Context, host string, por if stopReplicationBefore { cmds = append(cmds, conn.StopReplicationCommand()) } - // Reset replication parameters commands makes the instance forget the source host port - // This is required because sometimes MySQL gets stuck due to improper initialization of - // master info structure or related failures and throws errors like - // ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log - // These errors can only be resolved by resetting the replication parameters, otherwise START SLAVE fails. - // Therefore, we have elected to always reset the replication parameters whenever we try to set the source host port - // Since there is no real overhead, but it makes this function robust enough to also handle failures like these. - cmds = append(cmds, conn.ResetReplicationParametersCommands()...) smc := conn.SetReplicationSourceCommand(params, host, port, int(replicationConnectRetry.Seconds())) cmds = append(cmds, smc) if startReplicationAfter { diff --git a/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go b/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go index c1e5899aac6..fb1f1f5b40e 100644 --- a/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go +++ b/go/vt/vtctl/grpcvtctldserver/endtoend/init_shard_primary_test.go @@ -61,13 +61,11 @@ func TestInitShardPrimary(t *testing.T) { tablet2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ // These come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // These come from InitShardPrimary "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -75,12 +73,10 @@ func TestInitShardPrimary(t *testing.T) { tablet3.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -125,7 +121,6 @@ func TestInitShardPrimaryNoFormerPrimary(t *testing.T) { tablet2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -134,7 +129,6 @@ func TestInitShardPrimaryNoFormerPrimary(t *testing.T) { tablet3.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE RESET ALL REPLICATION", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go index 2ffccd89289..df7a50d04b0 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication.go @@ -697,14 +697,8 @@ func (tm *TabletManager) setReplicationSourceLocked(ctx context.Context, parentA } host := parent.Tablet.MysqlHostname port := parent.Tablet.MysqlPort - // We want to reset the replication parameters and set replication source again when forceStartReplication is provided - // because sometimes MySQL gets stuck due to improper initialization of master info structure or related failures and throws errors like - // ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log - // These errors can only be resolved by resetting the replication parameters, otherwise START SLAVE fails. So when this RPC - // gets called from VTOrc or replication manager to fix the replication in these cases with forceStartReplication, we should also - // reset the replication parameters and set the source port information again. - if status.SourceHost != host || status.SourcePort != port || forceStartReplication { - // This handles reseting the replication parameters, changing the address and then starting the replication. + if status.SourceHost != host || status.SourcePort != port { + // This handles both changing the address and starting replication. if err := tm.MysqlDaemon.SetReplicationSource(ctx, host, port, wasReplicating, shouldbeReplicating); err != nil { if err := tm.handleRelayLogError(err); err != nil { return err @@ -960,11 +954,17 @@ func (tm *TabletManager) fixSemiSyncAndReplication(tabletType topodatapb.TabletT return nil } +// handleRelayLogError resets replication of the instance. +// This is required because sometimes MySQL gets stuck due to improper initialization of +// master info structure or related failures and throws errors like +// ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log +// These errors can only be resolved by resetting the replication, otherwise START SLAVE fails. func (tm *TabletManager) handleRelayLogError(err error) error { // attempt to fix this error: // Slave failed to initialize relay log info structure from the repository (errno 1872) (sqlstate HY000) during query: START SLAVE // see https://bugs.mysql.com/bug.php?id=83713 or https://github.com/vitessio/vitess/issues/5067 - if strings.Contains(err.Error(), "Slave failed to initialize relay log info structure from the repository") { + // The same fix also works for https://github.com/vitessio/vitess/issues/10955. + if strings.Contains(err.Error(), "Slave failed to initialize relay log info structure from the repository") || strings.Contains(err.Error(), "Could not initialize master info structure") { // Stop, reset and start replication again to resolve this error if err := tm.MysqlDaemon.RestartReplication(tm.hookExtraEnv()); err != nil { return err diff --git a/go/vt/vttablet/tabletmanager/tm_init_test.go b/go/vt/vttablet/tabletmanager/tm_init_test.go index 5812d1ddd17..ccaedf0c272 100644 --- a/go/vt/vttablet/tabletmanager/tm_init_test.go +++ b/go/vt/vttablet/tabletmanager/tm_init_test.go @@ -374,7 +374,6 @@ func TestCheckPrimaryShip(t *testing.T) { fakeMysql.SetReplicationSourceInputs = append(fakeMysql.SetReplicationSourceInputs, fmt.Sprintf("%v:%v", otherTablet.MysqlHostname, otherTablet.MysqlPort)) fakeMysql.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/backup_test.go b/go/vt/wrangler/testlib/backup_test.go index b845a6d7104..f65ba2f6df3 100644 --- a/go/vt/wrangler/testlib/backup_test.go +++ b/go/vt/wrangler/testlib/backup_test.go @@ -175,9 +175,8 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { }, } sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -188,7 +187,6 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -230,16 +228,14 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -296,7 +292,6 @@ func testBackupRestore(t *testing.T, cDetails *compressionDetails) error { "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -417,9 +412,8 @@ func TestBackupRestoreLagged(t *testing.T) { } sourceTablet.FakeMysqlDaemon.SetReplicationSourceInputs = []string{fmt.Sprintf("%s:%d", primary.Tablet.MysqlHostname, primary.Tablet.MysqlPort)} sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -430,7 +424,6 @@ func TestBackupRestoreLagged(t *testing.T) { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -488,16 +481,14 @@ func TestBackupRestoreLagged(t *testing.T) { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -638,9 +629,8 @@ func TestRestoreUnreachablePrimary(t *testing.T) { } sourceTablet.FakeMysqlDaemon.SetReplicationSourceInputs = []string{fmt.Sprintf("%s:%d", primary.Tablet.MysqlHostname, primary.Tablet.MysqlPort)} sourceTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // This first set of STOP and START commands come from @@ -651,7 +641,6 @@ func TestRestoreUnreachablePrimary(t *testing.T) { // These commands come from SetReplicationSource RPC called // to set the correct primary and semi-sync after Backup has concluded "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -681,16 +670,14 @@ func TestRestoreUnreachablePrimary(t *testing.T) { }, } destTablet.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", "RESET SLAVE ALL", "FAKE SET SLAVE POSITION", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/copy_schema_shard_test.go b/go/vt/wrangler/testlib/copy_schema_shard_test.go index b32e2c3d937..c3e574fea42 100644 --- a/go/vt/wrangler/testlib/copy_schema_shard_test.go +++ b/go/vt/wrangler/testlib/copy_schema_shard_test.go @@ -72,9 +72,8 @@ func copySchema(t *testing.T, useShardAsSource bool) { sourceRdonly := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_RDONLY, sourceRdonlyDb, TabletKeyspaceShard(t, "ks", "-80")) sourceRdonly.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go index a68f0299c94..859cd606fdb 100644 --- a/go/vt/wrangler/testlib/emergency_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/emergency_reparent_shard_test.go @@ -134,14 +134,12 @@ func TestEmergencyReparentShard(t *testing.T) { goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition) goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -167,12 +165,10 @@ func TestEmergencyReparentShard(t *testing.T) { goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition) goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -234,7 +230,6 @@ func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) { newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -267,14 +262,12 @@ func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) { moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions = append(moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition) newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition) moreAdvancedReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE IO_THREAD", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/external_reparent_test.go b/go/vt/wrangler/testlib/external_reparent_test.go index 5d47ca5724e..955515ac4b4 100644 --- a/go/vt/wrangler/testlib/external_reparent_test.go +++ b/go/vt/wrangler/testlib/external_reparent_test.go @@ -90,7 +90,6 @@ func TestTabletExternallyReparentedBasic(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -170,7 +169,6 @@ func TestTabletExternallyReparentedToReplica(t *testing.T) { // primary is still good to go. oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -249,7 +247,6 @@ func TestTabletExternallyReparentedWithDifferentMysqlPort(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -262,9 +259,8 @@ func TestTabletExternallyReparentedWithDifferentMysqlPort(t *testing.T) { // TabletActionReplicaWasRestarted and point to the new mysql port goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -339,7 +335,6 @@ func TestTabletExternallyReparentedContinueOnUnexpectedPrimary(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -352,9 +347,8 @@ func TestTabletExternallyReparentedContinueOnUnexpectedPrimary(t *testing.T) { // TabletActionReplicaWasRestarted and point to a bad host goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -425,7 +419,6 @@ func TestTabletExternallyReparentedRerun(t *testing.T) { oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START Replica", } @@ -438,9 +431,8 @@ func TestTabletExternallyReparentedRerun(t *testing.T) { // On the good replica, we will respond to // TabletActionReplicaWasRestarted. goodReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/permissions_test.go b/go/vt/wrangler/testlib/permissions_test.go index fec85ca3124..9d2a950c8e3 100644 --- a/go/vt/wrangler/testlib/permissions_test.go +++ b/go/vt/wrangler/testlib/permissions_test.go @@ -563,9 +563,8 @@ func TestPermissions(t *testing.T) { } replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index b0f2cf54f4a..010c1dc2301 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -81,7 +81,6 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -96,11 +95,9 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // we end up calling SetReplicationSource twice on the old primary - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -116,13 +113,11 @@ func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -198,7 +193,6 @@ func TestPlannedReparentShardNoError(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -213,11 +207,9 @@ func TestPlannedReparentShardNoError(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // we end up calling SetReplicationSource twice on the old primary - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -233,13 +225,11 @@ func TestPlannedReparentShardNoError(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -250,12 +240,10 @@ func TestPlannedReparentShardNoError(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -337,7 +325,6 @@ func TestPlannedReparentInitialization(t *testing.T) { goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -350,7 +337,6 @@ func TestPlannedReparentInitialization(t *testing.T) { goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) goodReplica2.StartActionLoop(t, wr) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", } defer goodReplica2.StopActionLoop(t) @@ -421,7 +407,6 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -436,7 +421,6 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.PromoteResult oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -451,13 +435,11 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -468,12 +450,10 @@ func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -534,7 +514,6 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -548,7 +527,6 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -563,13 +541,11 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -580,12 +556,10 @@ func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -643,9 +617,8 @@ func TestPlannedReparentShardRelayLogError(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // simulate error that will trigger a call to RestartReplication @@ -726,9 +699,8 @@ func TestPlannedReparentShardRelayLogErrorStartReplication(t *testing.T) { goodReplica1.FakeMysqlDaemon.CurrentSourcePort = primary.Tablet.MysqlPort goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ // simulate error that will trigger a call to RestartReplication - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // In SetReplicationSource, we find that the source host and port was already set correctly, @@ -807,7 +779,6 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { } newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", @@ -822,7 +793,6 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -837,13 +807,11 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -854,12 +822,10 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) @@ -880,22 +846,18 @@ func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { newPrimary.FakeMysqlDaemon.PromoteError = nil newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // extra commands because of retry "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", } oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", // extra commands because of retry - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -954,13 +916,11 @@ func TestPlannedReparentShardSamePrimary(t *testing.T) { goodReplica1.FakeMysqlDaemon.Replicating = true goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -971,12 +931,10 @@ func TestPlannedReparentShardSamePrimary(t *testing.T) { goodReplica2.FakeMysqlDaemon.ReadOnly = true goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", } goodReplica2.StartActionLoop(t, wr) diff --git a/go/vt/wrangler/testlib/reparent_utils_test.go b/go/vt/wrangler/testlib/reparent_utils_test.go index 5596d120e8c..edd10f6dfdd 100644 --- a/go/vt/wrangler/testlib/reparent_utils_test.go +++ b/go/vt/wrangler/testlib/reparent_utils_test.go @@ -18,6 +18,7 @@ package testlib import ( "context" + "errors" "testing" "time" @@ -90,9 +91,8 @@ func TestShardReplicationStatuses(t *testing.T) { replica.FakeMysqlDaemon.CurrentSourcePort = primary.Tablet.MysqlPort replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -160,13 +160,11 @@ func TestReparentTablet(t *testing.T) { replica.FakeMysqlDaemon.IOThreadRunning = true replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", } @@ -184,3 +182,63 @@ func TestReparentTablet(t *testing.T) { } checkSemiSyncEnabled(t, false, true, replica) } + +// TestSetReplicationSourceRelayLogError tests that SetReplicationSource works as intended when we receive a relay log error while starting replication. +func TestSetReplicationSourceRelayLogError(t *testing.T) { + ctx := context.Background() + ts := memorytopo.NewServer("cell1", "cell2") + wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) + + // create shard and tablets + if _, err := ts.GetOrCreateShard(ctx, "test_keyspace", "0"); err != nil { + t.Fatalf("CreateShard failed: %v", err) + } + primary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_PRIMARY, nil) + replica := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) + reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") + + // mark the primary inside the shard + if _, err := ts.UpdateShardFields(ctx, "test_keyspace", "0", func(si *topo.ShardInfo) error { + si.PrimaryAlias = primary.Tablet.Alias + return nil + }); err != nil { + t.Fatalf("UpdateShardFields failed: %v", err) + } + + // primary action loop (to initialize host and port) + primary.StartActionLoop(t, wr) + defer primary.StopActionLoop(t) + + // replica loop + // We have to set the settings as replicating. Otherwise, + // the replication manager intervenes and tries to fix replication, + // which ends up making this test unpredictable. + replica.FakeMysqlDaemon.Replicating = true + replica.FakeMysqlDaemon.IOThreadRunning = true + replica.FakeMysqlDaemon.SetReplicationSourceInputs = append(replica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) + replica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ + // These 3 statements come from tablet startup + "STOP SLAVE", + "FAKE SET MASTER", + "START SLAVE", + // We stop and reset the replication parameters because of relay log issues. + "STOP SLAVE", + "RESET SLAVE", + "START SLAVE", + } + replica.StartActionLoop(t, wr) + defer replica.StopActionLoop(t) + + // Set the correct error message that indicates we have received a relay log error. + replica.FakeMysqlDaemon.SetReplicationSourceError = errors.New("ERROR 1201 (HY000): Could not initialize master info structure; more error messages can be found in the MySQL error log") + // run ReparentTablet + if err := wr.SetReplicationSource(ctx, replica.Tablet); err != nil { + t.Fatalf("SetReplicationSource failed: %v", err) + } + + // check what was run + if err := replica.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { + t.Fatalf("replica.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) + } + checkSemiSyncEnabled(t, false, true, replica) +} diff --git a/go/vt/wrangler/testlib/version_test.go b/go/vt/wrangler/testlib/version_test.go index 18b29d61ac2..c54a0811948 100644 --- a/go/vt/wrangler/testlib/version_test.go +++ b/go/vt/wrangler/testlib/version_test.go @@ -93,9 +93,8 @@ func TestVersion(t *testing.T) { sourceReplicaGitRev := "fake git rev" sourceReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(sourceReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(sourcePrimary.Tablet)) sourceReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ - // These 4 statements come from tablet startup + // These 3 statements come from tablet startup "STOP SLAVE", - "RESET SLAVE ALL", "FAKE SET MASTER", "START SLAVE", }