diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index 1ad55715985..ca4d0dcc3c2 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -351,17 +351,44 @@ func (erp *EmergencyReparenter) waitForAllRelayLogsToApply( groupCtx, groupCancel := context.WithTimeout(ctx, opts.WaitReplicasTimeout) defer groupCancel() + waiterCount := 0 + for candidate := range validCandidates { + // When we called StopReplicationAndBuildStatusMaps, we got back two + // maps: (1) the StopReplicationStatus of any replicas that actually + // stopped replication; and (2) the MasterStatus of anything that + // returned ErrNotReplica, which is a tablet that is either the current + // primary or is stuck thinking it is a MASTER but is not in actuality. + // + // If we have a tablet in the validCandidates map that does not appear + // in the statusMap, then we have either (a) the current primary, which + // is not replicating, so it is not applying relay logs; or (b) a tablet + // that is stuck thinking it is MASTER but is not in actuality. In that + // second case - (b) - we will most likely find that the stuck MASTER + // does not have a winning position, and fail the ERS. If, on the other + // hand, it does have a winning position, we are trusting the operator + // to know what they are doing by emergency-reparenting onto that + // tablet. In either case, it does not make sense to wait for relay logs + // to apply on a tablet that was never applying relay logs in the first + // place, so we skip it, and log that we did. + status, ok := statusMap[candidate] + if !ok { + erp.logger.Infof("EmergencyReparent candidate %v not in replica status map; this means it was not running replication (because it was formerly MASTER), so skipping WaitForRelayLogsToApply step for this candidate", candidate) + continue + } + go func(alias string) { var err error defer func() { errCh <- err }() - err = WaitForRelayLogsToApply(groupCtx, erp.tmc, tabletMap[alias], statusMap[alias]) + err = WaitForRelayLogsToApply(groupCtx, erp.tmc, tabletMap[alias], status) }(candidate) + + waiterCount++ } errgroup := concurrency.ErrorGroup{ - NumGoroutines: len(validCandidates), - NumRequiredSuccesses: len(validCandidates), + NumGoroutines: waiterCount, + NumRequiredSuccesses: waiterCount, NumAllowedErrors: 0, } rec := errgroup.Wait(groupCancel, errCh) diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go index 95ba0b9e885..4f1d7170cab 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go @@ -338,6 +338,111 @@ func TestEmergencyReparenter_reparentShardLocked(t *testing.T) { }, shouldErr: false, }, + { + name: "success with existing primary", + ts: memorytopo.NewServer("zone1"), + tmc: &emergencyReparenterTestTMClient{ + DemoteMasterResults: map[string]struct { + Status *replicationdatapb.MasterStatus + Error error + }{ + "zone1-0000000100": { + Status: &replicationdatapb.MasterStatus{ + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21", + }, + }, + }, + PopulateReparentJournalResults: map[string]error{ + "zone1-0000000102": nil, + }, + PromoteReplicaResults: map[string]struct { + Result string + Error error + }{ + "zone1-0000000102": { + Result: "ok", + Error: nil, + }, + }, + SetMasterResults: map[string]error{ + "zone1-0000000100": nil, + "zone1-0000000101": nil, + }, + StopReplicationAndGetStatusResults: map[string]struct { + Status *replicationdatapb.Status + StopStatus *replicationdatapb.StopReplicationStatus + Error error + }{ + "zone1-0000000100": { // This tablet claims MASTER, so is not running replication. + Error: mysql.ErrNotReplica, + }, + "zone1-0000000101": { + StopStatus: &replicationdatapb.StopReplicationStatus{ + Before: &replicationdatapb.Status{}, + After: &replicationdatapb.Status{ + MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21", + }, + }, + }, + "zone1-0000000102": { + StopStatus: &replicationdatapb.StopReplicationStatus{ + Before: &replicationdatapb.Status{}, + After: &replicationdatapb.Status{ + MasterUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-26", + }, + }, + }, + }, + WaitForPositionResults: map[string]map[string]error{ + "zone1-0000000101": { + "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-21": nil, + }, + "zone1-0000000102": { + "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-26": nil, + }, + }, + }, + shards: []*vtctldatapb.Shard{ + { + Keyspace: "testkeyspace", + Name: "-", + }, + }, + tablets: []*topodatapb.Tablet{ + { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Keyspace: "testkeyspace", + Shard: "-", + Type: topodatapb.TabletType_MASTER, + }, + { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Keyspace: "testkeyspace", + Shard: "-", + }, + { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 102, + }, + Keyspace: "testkeyspace", + Shard: "-", + Hostname: "most up-to-date position, wins election", + }, + }, + keyspace: "testkeyspace", + shard: "-", + opts: EmergencyReparentOptions{}, + shouldErr: false, + }, { name: "shard not found", ts: memorytopo.NewServer("zone1"),