diff --git a/go/vt/vtctld/api_test.go b/go/vt/vtctld/api_test.go index 57d4e77a0e3..0d272d2f0cf 100644 --- a/go/vt/vtctld/api_test.go +++ b/go/vt/vtctld/api_test.go @@ -131,6 +131,7 @@ func TestAPI(t *testing.T) { {"GET", "shards/ks1/", "", `["-80","80-"]`}, {"GET", "shards/ks1/-80", "", `{ "master_alias": null, + "master_term_start_time":null, "key_range": { "start": null, "end":"gA==" diff --git a/go/vt/vttablet/tabletmanager/rpc_replication.go b/go/vt/vttablet/tabletmanager/rpc_replication.go index 6be2f0e14fe..8d5e6a43abf 100644 --- a/go/vt/vttablet/tabletmanager/rpc_replication.go +++ b/go/vt/vttablet/tabletmanager/rpc_replication.go @@ -576,11 +576,10 @@ func (agent *ActionAgent) setMasterLocked(ctx context.Context, parentAlias *topo // if needed, wait until we get the replicated row, or our // context times out - if !shouldbeReplicating || timeCreatedNS == 0 { - return nil - } - if err := agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS); err != nil { - return err + if shouldbeReplicating && timeCreatedNS != 0 { + if err := agent.MysqlDaemon.WaitForReparentJournal(ctx, timeCreatedNS); err != nil { + return err + } } if typeChanged { if err := agent.refreshTablet(ctx, "SetMaster"); err != nil { diff --git a/go/vt/vttablet/tabletmanager/shard_sync.go b/go/vt/vttablet/tabletmanager/shard_sync.go index a5676673138..9141396d2cf 100644 --- a/go/vt/vttablet/tabletmanager/shard_sync.go +++ b/go/vt/vttablet/tabletmanager/shard_sync.go @@ -67,12 +67,15 @@ func (agent *ActionAgent) shardSyncLoop(ctx context.Context) { select { case <-notifyChan: // Something may have changed in the tablet state. + log.Info("Change to tablet state") case <-retryChan: // It's time to retry a previous failed sync attempt. + log.Info("Retry sync") case event := <-shardWatch.watchChan: // Something may have changed in the shard record. // We don't use the watch event except to know that we should // re-read the shard record, and to know if the watch dies. + log.Info("Change in shard record") if event.Err != nil { // The watch failed. Stop it so we start a new one if needed. log.Errorf("Shard watch failed: %v", event.Err) diff --git a/go/vt/wrangler/reparent.go b/go/vt/wrangler/reparent.go index 34fa2c9ba9a..1badf670787 100644 --- a/go/vt/wrangler/reparent.go +++ b/go/vt/wrangler/reparent.go @@ -438,11 +438,13 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R return fmt.Errorf("old master tablet %v DemoteMaster failed: %v", topoproto.TabletAliasString(shardInfo.MasterAlias), err) } + promoteCtx, promoteCancel := context.WithTimeout(ctx, waitSlaveTimeout) + defer promoteCancel() // Wait on the master-elect tablet until it reaches that position, // then promote it. wr.logger.Infof("promote replica %v", masterElectTabletAliasStr) event.DispatchUpdate(ev, "promoting replica") - rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(remoteCtx, masterElectTabletInfo.Tablet, rp) + rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(promoteCtx, masterElectTabletInfo.Tablet, rp) if err != nil || (ctx.Err() != nil && ctx.Err() == context.DeadlineExceeded) { remoteCancel() // If we fail to promote the new master, try to roll back to the @@ -457,9 +459,6 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R reparentJournalPos = rp } - remoteCtx, remoteCancel = context.WithTimeout(ctx, waitSlaveTimeout) - defer remoteCancel() - // Check we still have the topology lock. if err := topo.CheckShardLocked(ctx, keyspace, shard); err != nil { return fmt.Errorf("lost topology lock, aborting: %v", err) @@ -520,18 +519,6 @@ func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.R return fmt.Errorf("failed to PopulateReparentJournal on master: %v", err) } - // After the master is done, we can update the shard record. - // TODO(deepthi): Remove this when we make the master tablet responsible for - // updating the shard record. - wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias) - if _, err := wr.ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { - si.MasterAlias = masterElectTabletAlias - return nil - }); err != nil { - wgReplicas.Wait() - return fmt.Errorf("failed to update shard master record: %v", err) - } - // Wait for the replicas to complete. wgReplicas.Wait() if err := rec.Error(); err != nil { diff --git a/go/vt/wrangler/testlib/planned_reparent_shard_test.go b/go/vt/wrangler/testlib/planned_reparent_shard_test.go index 7b8254c6ee3..5dbd7fb2b33 100644 --- a/go/vt/wrangler/testlib/planned_reparent_shard_test.go +++ b/go/vt/wrangler/testlib/planned_reparent_shard_test.go @@ -81,6 +81,9 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { oldMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", "START SLAVE", + // we end up calling SetMaster twice on the old master + "FAKE SET MASTER", + "START SLAVE", } oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) @@ -103,7 +106,7 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { t.Fatalf("PlannedReparentShard failed: %v", err) } - // // check what was run + // check what was run if err := newMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Errorf("newMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } @@ -126,8 +129,8 @@ func TestPlannedReparentShardNoMasterProvided(t *testing.T) { t.Errorf("oldMaster...QueryServiceControl not serving") } - // // verify the old master was told to start replicating (and not - // // the slave that wasn't replicating in the first place) + // verify the old master was told to start replicating (and not + // the slave that wasn't replicating in the first place) if !oldMaster.FakeMysqlDaemon.Replicating { t.Errorf("oldMaster.FakeMysqlDaemon.Replicating not set") } @@ -188,6 +191,9 @@ func TestPlannedReparentShard(t *testing.T) { oldMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "FAKE SET MASTER", "START SLAVE", + // we end up calling SetMaster twice on the old master + "FAKE SET MASTER", + "START SLAVE", } oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t)