diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index 513076b8f68..f2042ea38df 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -22,18 +22,17 @@ import ( "sync" "time" - "vitess.io/vitess/go/mysql/replication" - "vitess.io/vitess/go/vt/log" - "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" - "vitess.io/vitess/go/event" + "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/sets" "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/concurrency" + "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/topotools/events" + "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" "vitess.io/vitess/go/vt/vtctl/reparentutil/promotionrule" "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vttablet/tmclient" @@ -154,7 +153,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve shardInfo *topo.ShardInfo prevPrimary *topodatapb.Tablet tabletMap map[string]*topo.TabletInfo - validCandidates map[string]replication.Position + validCandidates map[string]*RelayLogPositions intermediateSource *topodatapb.Tablet validCandidateTablets []*topodatapb.Tablet validReplacementCandidates []*topodatapb.Tablet @@ -330,7 +329,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve func (erp *EmergencyReparenter) waitForAllRelayLogsToApply( ctx context.Context, - validCandidates map[string]replication.Position, + validCandidates map[string]*RelayLogPositions, tabletMap map[string]*topo.TabletInfo, statusMap map[string]*replicationdatapb.StopReplicationStatus, waitReplicasTimeout time.Duration, @@ -396,7 +395,7 @@ func (erp *EmergencyReparenter) waitForAllRelayLogsToApply( // findMostAdvanced finds the intermediate source for ERS. We always choose the most advanced one from our valid candidates list. Further ties are broken by looking at the promotion rules. func (erp *EmergencyReparenter) findMostAdvanced( - validCandidates map[string]replication.Position, + validCandidates map[string]*RelayLogPositions, tabletMap map[string]*topo.TabletInfo, opts EmergencyReparentOptions, ) (*topodatapb.Tablet, []*topodatapb.Tablet, error) { @@ -791,11 +790,11 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb // The caller of this function (ERS) will then choose from among the list of candidate tablets, based on higher-level criteria. func (erp *EmergencyReparenter) findErrantGTIDs( ctx context.Context, - validCandidates map[string]replication.Position, + validCandidates map[string]*RelayLogPositions, statusMap map[string]*replicationdatapb.StopReplicationStatus, tabletMap map[string]*topo.TabletInfo, waitReplicasTimeout time.Duration, -) (map[string]replication.Position, error) { +) (map[string]*RelayLogPositions, error) { // First we need to collect the reparent journal length for all the candidates. // This will tell us, which of the tablets are severly lagged, and haven't even seen all the primary promotions. // Such severely lagging tablets cannot be used to find errant GTIDs in other tablets, seeing that they themselves don't have enough information. @@ -820,8 +819,13 @@ func (erp *EmergencyReparenter) findErrantGTIDs( // We use all the candidates with the maximum length of the reparent journal to find the errant GTIDs amongst them. var maxLenPositions []replication.Position - updatedValidCandidates := make(map[string]replication.Position) + updatedValidCandidates := make(map[string]*RelayLogPositions) for _, candidate := range maxLenCandidates { + candidatePositions := validCandidates[candidate] + if candidatePositions == nil || candidatePositions.IsZero() { + continue + } + status, ok := statusMap[candidate] if !ok { // If the tablet is not in the status map, and has the maximum length of the reparent journal, @@ -834,7 +838,7 @@ func (erp *EmergencyReparenter) findErrantGTIDs( // 4. During this ERS call, both A and B are seen online. They would both report being primary tablets with the same reparent journal length. // Even in this case, the best we can do is not run errant GTID detection on either, and let the split brain detection code // deal with it, if A in fact has errant GTIDs. - maxLenPositions = append(maxLenPositions, validCandidates[candidate]) + maxLenPositions = append(maxLenPositions, candidatePositions.Combined) updatedValidCandidates[candidate] = validCandidates[candidate] continue } @@ -844,7 +848,10 @@ func (erp *EmergencyReparenter) findErrantGTIDs( if otherCandidate == candidate { continue } - otherPositions = append(otherPositions, validCandidates[otherCandidate]) + otherPosition := validCandidates[otherCandidate] + if otherPosition != nil || !otherPosition.IsZero() { + otherPositions = append(otherPositions, otherPosition.Combined) + } } // Run errant GTID detection and throw away any tablet that has errant GTIDs. afterStatus := replication.ProtoToReplicationStatus(status.After) @@ -856,7 +863,7 @@ func (erp *EmergencyReparenter) findErrantGTIDs( log.Errorf("skipping %v with GTIDSet:%v because we detected errant GTIDs - %v", candidate, afterStatus.RelayLogPosition.GTIDSet, errantGTIDs) continue } - maxLenPositions = append(maxLenPositions, validCandidates[candidate]) + maxLenPositions = append(maxLenPositions, candidatePositions.Combined) updatedValidCandidates[candidate] = validCandidates[candidate] } @@ -882,7 +889,7 @@ func (erp *EmergencyReparenter) findErrantGTIDs( // This exact scenario outlined above, can be found in the test for this function, subtest `Case 5a`. // The idea is that if the tablet is lagged, then even the server UUID that it is replicating from // should not be considered a valid source of writes that no other tablet has. - errantGTIDs, err := replication.FindErrantGTIDs(validCandidates[alias], replication.SID{}, maxLenPositions) + errantGTIDs, err := replication.FindErrantGTIDs(validCandidates[alias].Combined, replication.SID{}, maxLenPositions) if err != nil { return nil, err } @@ -899,7 +906,7 @@ func (erp *EmergencyReparenter) findErrantGTIDs( // gatherReparenJournalInfo reads the reparent journal information from all the tablets in the valid candidates list. func (erp *EmergencyReparenter) gatherReparenJournalInfo( ctx context.Context, - validCandidates map[string]replication.Position, + validCandidates map[string]*RelayLogPositions, tabletMap map[string]*topo.TabletInfo, waitReplicasTimeout time.Duration, ) (map[string]int32, error) { diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go index 529c2a8f366..fdc080156c5 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go @@ -2402,7 +2402,7 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { tests := []struct { name string tmc *testutil.TabletManagerClient - candidates map[string]replication.Position + candidates map[string]*RelayLogPositions tabletMap map[string]*topo.TabletInfo statusMap map[string]*replicationdatapb.StopReplicationStatus shouldErr bool @@ -2419,7 +2419,7 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { }, }, }, - candidates: map[string]replication.Position{ + candidates: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, }, @@ -2467,7 +2467,7 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { }, }, }, - candidates: map[string]replication.Position{ + candidates: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, }, @@ -2518,7 +2518,7 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { }, }, }, - candidates: map[string]replication.Position{ + candidates: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, "zone1-0000000102": {}, @@ -2583,7 +2583,7 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) { }, }, }, - candidates: map[string]replication.Position{ + candidates: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, }, @@ -2793,26 +2793,57 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { Sequence: 11, } - positionMostAdvanced := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID1) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID2) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID3) + // most advanced gtid set + positionMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID2) + + // same combined gtid set as positionMostAdvanced, but 1 position behind in gtid executed + positionAlmostMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionAlmostMostAdvanced.Executed.GTIDSet = positionAlmostMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate1 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate1.GTIDSet = positionIntermediate1.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate1 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate1.Combined.GTIDSet = positionIntermediate1.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate1.Executed.GTIDSet = positionIntermediate1.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate2 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate2 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate2.Executed.GTIDSet = positionIntermediate2.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionOnly2 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionOnly2.GTIDSet = positionOnly2.GTIDSet.AddGTID(mysqlGTID2) + positionOnly2 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionOnly2.Combined.GTIDSet = positionOnly2.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionOnly2.Executed.GTIDSet = positionOnly2.Executed.GTIDSet.AddGTID(mysqlGTID2) - positionEmpty := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} + positionEmpty := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } tests := []struct { name string - validCandidates map[string]replication.Position + validCandidates map[string]*RelayLogPositions tabletMap map[string]*topo.TabletInfo emergencyReparentOps EmergencyReparentOptions result *topodatapb.Tablet @@ -2820,10 +2851,11 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { }{ { name: "choose most advanced", - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": positionMostAdvanced, "zone1-0000000101": positionIntermediate1, "zone1-0000000102": positionIntermediate2, + "zone1-0000000103": positionAlmostMostAdvanced, }, tabletMap: map[string]*topo.TabletInfo{ "zone1-0000000100": { @@ -2850,6 +2882,14 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { }, }, }, + "zone1-0000000103": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + }, + }, "zone1-0000000404": { Tablet: &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ @@ -2868,10 +2908,11 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { }, }, { name: "choose most advanced with the best promotion rule", - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": positionMostAdvanced, "zone1-0000000101": positionIntermediate1, "zone1-0000000102": positionMostAdvanced, + "zone1-0000000103": positionAlmostMostAdvanced, }, tabletMap: map[string]*topo.TabletInfo{ "zone1-0000000100": { @@ -2900,6 +2941,14 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { Type: topodatapb.TabletType_RDONLY, }, }, + "zone1-0000000103": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + }, + }, "zone1-0000000404": { Tablet: &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ @@ -2922,10 +2971,11 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { Cell: "zone1", Uid: 102, }}, - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": positionMostAdvanced, "zone1-0000000101": positionIntermediate1, "zone1-0000000102": positionMostAdvanced, + "zone1-0000000103": positionAlmostMostAdvanced, }, tabletMap: map[string]*topo.TabletInfo{ "zone1-0000000100": { @@ -2954,6 +3004,14 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { Type: topodatapb.TabletType_RDONLY, }, }, + "zone1-0000000103": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + }, + }, "zone1-0000000404": { Tablet: &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ @@ -2976,7 +3034,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) { Cell: "zone1", Uid: 102, }}, - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": positionOnly2, "zone1-0000000101": positionIntermediate1, "zone1-0000000102": positionEmpty, diff --git a/go/vt/vtctl/reparentutil/reparent_sorter.go b/go/vt/vtctl/reparentutil/reparent_sorter.go index 2f9c3c9ea8d..9ba8c5d466c 100644 --- a/go/vt/vtctl/reparentutil/reparent_sorter.go +++ b/go/vt/vtctl/reparentutil/reparent_sorter.go @@ -19,7 +19,6 @@ package reparentutil import ( "sort" - "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" "vitess.io/vitess/go/vt/vterrors" @@ -31,13 +30,13 @@ import ( // candidate for intermediate promotion in emergency reparent shard, and the new primary in planned reparent shard type reparentSorter struct { tablets []*topodatapb.Tablet - positions []replication.Position + positions []*RelayLogPositions innodbBufferPool []int durability policy.Durabler } // newReparentSorter creates a new reparentSorter -func newReparentSorter(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability policy.Durabler) *reparentSorter { +func newReparentSorter(tablets []*topodatapb.Tablet, positions []*RelayLogPositions, innodbBufferPool []int, durability policy.Durabler) *reparentSorter { return &reparentSorter{ tablets: tablets, positions: positions, @@ -72,11 +71,15 @@ func (rs *reparentSorter) Less(i, j int) bool { return true } - if !rs.positions[i].AtLeast(rs.positions[j]) { + // sort by combined positions. if equal, also sort by the executed GTID positions. + jPositions := rs.positions[j] + iPositions := rs.positions[i] + + if !iPositions.AtLeast(jPositions) { // [i] does not have all GTIDs that [j] does return false } - if !rs.positions[j].AtLeast(rs.positions[i]) { + if !jPositions.AtLeast(iPositions) { // [j] does not have all GTIDs that [i] does return true } @@ -101,7 +104,7 @@ func (rs *reparentSorter) Less(i, j int) bool { // sortTabletsForReparent sorts the tablets, given their positions for emergency reparent shard and planned reparent shard. // Tablets are sorted first by their replication positions, with ties broken by the promotion rules. -func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability policy.Durabler) error { +func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []*RelayLogPositions, innodbBufferPool []int, durability policy.Durabler) error { // throw an error internal error in case of unequal number of tablets and positions // fail-safe code prevents panic in sorting in case the lengths are unequal if len(tablets) != len(positions) { diff --git a/go/vt/vtctl/reparentutil/reparent_sorter_test.go b/go/vt/vtctl/reparentutil/reparent_sorter_test.go index 86aa129f1a4..5464de20f18 100644 --- a/go/vt/vtctl/reparentutil/reparent_sorter_test.go +++ b/go/vt/vtctl/reparentutil/reparent_sorter_test.go @@ -53,6 +53,13 @@ func TestReparentSorter(t *testing.T) { }, Type: topodatapb.TabletType_REPLICA, } + tabletReplica3_103 := &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: cell1, + Uid: 103, + }, + Type: topodatapb.TabletType_REPLICA, + } tabletRdonly1_102 := &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ Cell: cell1, @@ -74,65 +81,90 @@ func TestReparentSorter(t *testing.T) { Sequence: 11, } - positionMostAdvanced := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID1) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID2) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID3) + positionMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID2) - positionEmpty := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} + positionAlmostMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionAlmostMostAdvanced.Executed.GTIDSet = positionAlmostMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate1 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate1.GTIDSet = positionIntermediate1.GTIDSet.AddGTID(mysqlGTID1) + positionEmpty := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } - positionIntermediate2 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate1 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate1.Combined.GTIDSet = positionIntermediate1.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate1.Executed.GTIDSet = positionIntermediate1.Executed.GTIDSet.AddGTID(mysqlGTID1) + + positionIntermediate2 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate2.Executed.GTIDSet = positionIntermediate2.Executed.GTIDSet.AddGTID(mysqlGTID1) testcases := []struct { name string tablets []*topodatapb.Tablet innodbBufferPool []int - positions []replication.Position + positions []*RelayLogPositions containsErr string sortedTablets []*topodatapb.Tablet }{ { name: "all advanced, sort via promotion rules", tablets: []*topodatapb.Tablet{nil, tabletReplica1_100, tabletRdonly1_102}, - positions: []replication.Position{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, + positions: []*RelayLogPositions{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletRdonly1_102, nil}, }, { name: "all advanced, sort via innodb buffer pool", tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100}, - positions: []replication.Position{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, + positions: []*RelayLogPositions{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, innodbBufferPool: []int{10, 40, 25}, sortedTablets: []*topodatapb.Tablet{tabletReplica2_100, tabletReplica1_100, tabletReplica1_101}, }, { name: "ordering by position", - tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, - positions: []replication.Position{positionEmpty, positionIntermediate1, positionIntermediate2, positionMostAdvanced}, - sortedTablets: []*topodatapb.Tablet{tabletRdonly1_102, tabletReplica1_100, tabletReplica2_100, tabletReplica1_101}, + tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102, tabletReplica3_103}, + positions: []*RelayLogPositions{positionEmpty, positionIntermediate1, positionIntermediate2, positionMostAdvanced, positionAlmostMostAdvanced}, + sortedTablets: []*topodatapb.Tablet{tabletRdonly1_102, tabletReplica3_103, tabletReplica1_100, tabletReplica2_100, tabletReplica1_101}, }, { name: "tablets and positions count error", tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100}, - positions: []replication.Position{positionEmpty, positionIntermediate1, positionMostAdvanced}, + positions: []*RelayLogPositions{positionEmpty, positionIntermediate1, positionMostAdvanced}, containsErr: "unequal number of tablets and positions", }, { name: "promotion rule check", tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletRdonly1_102}, - positions: []replication.Position{positionMostAdvanced, positionMostAdvanced}, + positions: []*RelayLogPositions{positionMostAdvanced, positionMostAdvanced}, sortedTablets: []*topodatapb.Tablet{tabletReplica1_101, tabletRdonly1_102}, }, { name: "mixed", - tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, - positions: []replication.Position{positionEmpty, positionIntermediate1, positionMostAdvanced, positionIntermediate1}, - sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica2_100, tabletRdonly1_102, tabletReplica1_101}, + tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102, tabletReplica3_103}, + positions: []*RelayLogPositions{positionEmpty, positionIntermediate1, positionMostAdvanced, positionIntermediate1, positionAlmostMostAdvanced}, + sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica3_103, tabletReplica2_100, tabletRdonly1_102, tabletReplica1_101}, }, { name: "mixed - another", - tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, - positions: []replication.Position{positionIntermediate1, positionIntermediate1, positionMostAdvanced, positionIntermediate1}, - innodbBufferPool: []int{100, 200, 0, 200}, - sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica2_100, tabletReplica1_101, tabletRdonly1_102}, + tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102, tabletReplica3_103}, + positions: []*RelayLogPositions{positionIntermediate1, positionIntermediate1, positionMostAdvanced, positionIntermediate1, positionAlmostMostAdvanced}, + innodbBufferPool: []int{100, 200, 0, 200, 200}, + sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica3_103, tabletReplica2_100, tabletReplica1_101, tabletRdonly1_102}, }, } diff --git a/go/vt/vtctl/reparentutil/replication.go b/go/vt/vtctl/reparentutil/replication.go index 81211fac4cd..096cb7166ee 100644 --- a/go/vt/vtctl/reparentutil/replication.go +++ b/go/vt/vtctl/reparentutil/replication.go @@ -40,15 +40,57 @@ import ( "vitess.io/vitess/go/vt/vttablet/tmclient" ) +// RelayLogPositions contains the positions of the relay log. +type RelayLogPositions struct { + // Combined represents the entire range of the relay + // log with the retrieved + executed GTID sets + // combined. + Combined replication.Position + + // Executed represents the executed GTID set of the + // relay log/SQL thread. + Executed replication.Position +} + +// AtLeast returns true if the RelayLogPositions object contains at least the positions provided +// as pos. If the combined positions are equal, prioritize the position where more events have +// been executed/applied, as this avoids picking tablets with SQL delay (intended or not) that +// can delay/timeout the reparent. Otherwise, pick the larger of the two combined positions as +// it contains more changes, irrespective of how many changes are executed/applied. +func (rlp *RelayLogPositions) AtLeast(pos *RelayLogPositions) bool { + if pos == nil { + return false + } + + if rlp.Combined.Equal(pos.Combined) { + return rlp.Executed.AtLeast(pos.Executed) + } + return rlp.Combined.AtLeast(pos.Combined) +} + +// Equal returns true if the RelayLogPositions object is equal to +// the positions provided as pos. +func (rlp *RelayLogPositions) Equal(pos *RelayLogPositions) bool { + if pos == nil { + return false + } + return rlp.Combined.Equal(pos.Combined) && rlp.Executed.Equal(pos.Executed) +} + +// IsZero returns true if the RelayLogPositions is zero. +func (rlp *RelayLogPositions) IsZero() bool { + return rlp.Combined.IsZero() +} + // FindPositionsOfAllCandidates will find candidates for an emergency // reparent, and, if successful, return a mapping of those tablet aliases (as // raw strings) to their replication positions for later comparison. func FindPositionsOfAllCandidates( statusMap map[string]*replicationdatapb.StopReplicationStatus, primaryStatusMap map[string]*replicationdatapb.PrimaryStatus, -) (map[string]replication.Position, bool, error) { +) (map[string]*RelayLogPositions, bool, error) { replicationStatusMap := make(map[string]*replication.ReplicationStatus, len(statusMap)) - positionMap := make(map[string]replication.Position) + positionMap := make(map[string]*RelayLogPositions) // Build out replication status list from proto types. for alias, statuspb := range statusMap { @@ -90,11 +132,14 @@ func FindPositionsOfAllCandidates( // Store the final positions in the map. for alias, status := range replicationStatusMap { if !isGTIDBased { - positionMap[alias] = status.Position + positionMap[alias] = &RelayLogPositions{Combined: status.Position} continue } - positionMap[alias] = status.RelayLogPosition + positionMap[alias] = &RelayLogPositions{ + Combined: status.RelayLogPosition, + Executed: status.Position, + } } for alias, primaryStatus := range primaryStatusMap { @@ -103,7 +148,7 @@ func FindPositionsOfAllCandidates( return nil, false, vterrors.Wrapf(err, "could not decode a primary status executed position for tablet %v: %v", alias, err) } - positionMap[alias] = executedPosition + positionMap[alias] = &RelayLogPositions{Combined: executedPosition} } return positionMap, isGTIDBased, nil diff --git a/go/vt/vtctl/reparentutil/replication_test.go b/go/vt/vtctl/reparentutil/replication_test.go index 3d124b7495b..be0b47e7246 100644 --- a/go/vt/vtctl/reparentutil/replication_test.go +++ b/go/vt/vtctl/reparentutil/replication_test.go @@ -25,16 +25,15 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "vitess.io/vitess/go/mysql/replication" - "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" - _flag "vitess.io/vitess/go/internal/flag" "vitess.io/vitess/go/mysql" + "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/sets" "vitess.io/vitess/go/vt/logutil" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/topotools/events" + "vitess.io/vitess/go/vt/vtctl/reparentutil/policy" "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vttablet/tmclient" @@ -1502,3 +1501,72 @@ func TestWaitForRelayLogsToApply(t *testing.T) { }) } } + +func TestRelayLogPositions_AtLeast(t *testing.T) { + gtidSet1, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6") + gtidSet2, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5") + gtidSet3, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-3") + gtidSet4, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-2") + + rlp := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: gtidSet1}, + Executed: replication.Position{GTIDSet: gtidSet3}, + } + + // rlp is equal + assert.True(t, rlp.AtLeast(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: rlp.Combined.GTIDSet}, + Executed: replication.Position{GTIDSet: rlp.Executed.GTIDSet}, + })) + + // rlp is less advanced + assert.False(t, rlp.AtLeast(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: gtidSet1}, + Executed: replication.Position{GTIDSet: gtidSet2}, + })) + + // rlp is more advanced + assert.True(t, rlp.AtLeast(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: gtidSet2}, + Executed: replication.Position{GTIDSet: gtidSet4}, + })) +} + +func TestRelayLogPositions_Equal(t *testing.T) { + gtidSet1, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6") + gtidSet2, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5") + gtidSet3, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-3") + + rlp := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: gtidSet1}, + Executed: replication.Position{GTIDSet: gtidSet2}, + } + + // rlp is not equal + assert.False(t, rlp.Equal(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: gtidSet2}, + Executed: replication.Position{GTIDSet: gtidSet3}, + })) + + // rlp is partially equal + assert.False(t, rlp.Equal(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: rlp.Combined.GTIDSet}, + Executed: replication.Position{GTIDSet: gtidSet3}, + })) + + // rlp is equal + assert.True(t, rlp.Equal(&RelayLogPositions{ + Combined: replication.Position{GTIDSet: rlp.Combined.GTIDSet}, + Executed: replication.Position{GTIDSet: rlp.Executed.GTIDSet}, + })) +} + +func TestRelayLogPositions_IsZero(t *testing.T) { + gtidSet, _ := replication.ParseMysql56GTIDSet("3e11fa47-71ca-11e1-9e33-c80aa9429562:1-6") + rlp := &RelayLogPositions{} + assert.True(t, rlp.IsZero()) + + rlp.Combined = replication.Position{GTIDSet: gtidSet} + rlp.Executed = replication.Position{GTIDSet: gtidSet} + assert.False(t, rlp.IsZero()) +} diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go index 5d2f2e4fddf..3ec5a907ff5 100644 --- a/go/vt/vtctl/reparentutil/util.go +++ b/go/vt/vtctl/reparentutil/util.go @@ -86,7 +86,7 @@ func ElectNewPrimary( mu sync.Mutex // tablets that are possible candidates to be the new primary and their positions validTablets []*topodatapb.Tablet - tabletPositions []replication.Position + tabletPositions []*RelayLogPositions innodbBufferPool []int errorGroup, groupCtx = errgroup.WithContext(ctx) ) @@ -167,9 +167,11 @@ func ElectNewPrimary( return validTablets[0].Alias, nil } -// findTabletPositionLagBackupStatus processes the replication position and lag for a single tablet and +// findTabletPositionLagBackupStatus processes the replication positions and lag for a single tablet and // returns it. It is safe to call from multiple goroutines. -func findTabletPositionLagBackupStatus(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (replication.Position, time.Duration, bool, bool, error) { +func findTabletPositionLagBackupStatus(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (*RelayLogPositions, time.Duration, bool, bool, error) { + rlp := &RelayLogPositions{} + logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias)) ctx, cancel := context.WithTimeout(ctx, waitTimeout) @@ -180,24 +182,25 @@ func findTabletPositionLagBackupStatus(ctx context.Context, tablet *topodatapb.T sqlErr, isSQLErr := sqlerror.NewSQLErrorFromError(err).(*sqlerror.SQLError) if isSQLErr && sqlErr != nil && sqlErr.Number() == sqlerror.ERNotReplica { logger.Warningf("no replication statue from %v, using empty gtid set", topoproto.TabletAliasString(tablet.Alias)) - return replication.Position{}, 0, false, false, nil + return rlp, 0, false, false, nil } logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err) - return replication.Position{}, 0, false, false, err + return rlp, 0, false, false, err } - // Use the relay log position if available, otherwise use the executed GTID set (binary log position). - positionString := status.Position - if status.RelayLogPosition != "" { - positionString = status.RelayLogPosition + rlp.Executed, err = replication.DecodePosition(status.Position) + if err != nil { + logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", status.Position, topoproto.TabletAliasString(tablet.Alias), err) + return rlp, 0, status.BackupRunning, false, err } - pos, err := replication.DecodePosition(positionString) + + rlp.Combined, err = replication.DecodePosition(status.RelayLogPosition) if err != nil { - logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", positionString, topoproto.TabletAliasString(tablet.Alias), err) - return replication.Position{}, 0, status.BackupRunning, false, err + logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", status.RelayLogPosition, topoproto.TabletAliasString(tablet.Alias), err) + return rlp, 0, status.BackupRunning, false, err } - return pos, time.Second * time.Duration(status.ReplicationLagSeconds), status.BackupRunning, status.ReplicationLagUnknown, nil + return rlp, time.Second * time.Duration(status.ReplicationLagSeconds), status.BackupRunning, status.ReplicationLagUnknown, nil } // FindCurrentPrimary returns the current primary tablet of a shard, if any. The @@ -299,9 +302,9 @@ func ShardReplicationStatuses(ctx context.Context, ts *topo.Server, tmc tmclient } // getValidCandidatesAndPositionsAsList converts the valid candidates from a map to a list of tablets, making it easier to sort -func getValidCandidatesAndPositionsAsList(validCandidates map[string]replication.Position, tabletMap map[string]*topo.TabletInfo) ([]*topodatapb.Tablet, []replication.Position, error) { +func getValidCandidatesAndPositionsAsList(validCandidates map[string]*RelayLogPositions, tabletMap map[string]*topo.TabletInfo) ([]*topodatapb.Tablet, []*RelayLogPositions, error) { var validTablets []*topodatapb.Tablet - var tabletPositions []replication.Position + var tabletPositions []*RelayLogPositions for tabletAlias, position := range validCandidates { tablet, isFound := tabletMap[tabletAlias] if !isFound { @@ -314,8 +317,8 @@ func getValidCandidatesAndPositionsAsList(validCandidates map[string]replication } // restrictValidCandidates is used to restrict some candidates from being considered eligible for becoming the intermediate source or the final promotion candidate -func restrictValidCandidates(validCandidates map[string]replication.Position, tabletMap map[string]*topo.TabletInfo) (map[string]replication.Position, error) { - restrictedValidCandidates := make(map[string]replication.Position) +func restrictValidCandidates(validCandidates map[string]*RelayLogPositions, tabletMap map[string]*topo.TabletInfo) (map[string]*RelayLogPositions, error) { + restrictedValidCandidates := make(map[string]*RelayLogPositions) for candidate, position := range validCandidates { candidateInfo, ok := tabletMap[candidate] if !ok { diff --git a/go/vt/vtctl/reparentutil/util_test.go b/go/vt/vtctl/reparentutil/util_test.go index 5bded8e226c..9e7d26b15e5 100644 --- a/go/vt/vtctl/reparentutil/util_test.go +++ b/go/vt/vtctl/reparentutil/util_test.go @@ -755,7 +755,8 @@ func TestElectNewPrimary(t *testing.T) { tmc: &chooseNewPrimaryTestTMClient{ replicationStatuses: map[string]*replicationdatapb.Status{ "zone1-0000000101": { - Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-2", + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-2", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-2", }, "zone1-0000000102": { Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1", @@ -1176,6 +1177,7 @@ func TestFindPositionForTablet(t *testing.T) { "zone1-0000000100": { Position: &replicationdatapb.Status{ Position: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", + RelayLogPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", ReplicationLagSeconds: 201, }, }, @@ -1199,6 +1201,7 @@ func TestFindPositionForTablet(t *testing.T) { "zone1-0000000100": { Position: &replicationdatapb.Status{ Position: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", + RelayLogPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", ReplicationLagSeconds: 201, }, }, @@ -1243,7 +1246,7 @@ func TestFindPositionForTablet(t *testing.T) { }{ "zone1-0000000100": { Position: &replicationdatapb.Status{ - Position: "unused", + Position: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-2", RelayLogPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", ReplicationLagSeconds: 291, }, @@ -1288,6 +1291,7 @@ func TestFindPositionForTablet(t *testing.T) { }{ "zone1-0000000100": { Position: &replicationdatapb.Status{ + Position: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", RelayLogPosition: "MySQL56/3e11fa47-71ca-11e1-9e33-c80aa9429562:1-5", ReplicationLagUnknown: true, }, @@ -1314,7 +1318,7 @@ func TestFindPositionForTablet(t *testing.T) { return } require.NoError(t, err) - posString := replication.EncodePosition(pos) + posString := replication.EncodePosition(pos.Combined) require.Equal(t, test.expectedPosition, posString) require.Equal(t, test.expectedLag, lag) require.Equal(t, test.expectedTakingBackup, takingBackup) @@ -1506,30 +1510,53 @@ func TestGetValidCandidatesAndPositionsAsList(t *testing.T) { Sequence: 11, } - positionMostAdvanced := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID1) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID2) - positionMostAdvanced.GTIDSet = positionMostAdvanced.GTIDSet.AddGTID(mysqlGTID3) + positionMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionMostAdvanced.Combined.GTIDSet = positionMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) + positionMostAdvanced.Executed.GTIDSet = positionMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID2) + + positionAlmostMostAdvanced := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionAlmostMostAdvanced.Combined.GTIDSet = positionAlmostMostAdvanced.Combined.GTIDSet.AddGTID(mysqlGTID3) + positionAlmostMostAdvanced.Executed.GTIDSet = positionAlmostMostAdvanced.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate1 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate1.GTIDSet = positionIntermediate1.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate1 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate1.Combined.GTIDSet = positionIntermediate1.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate1.Executed.GTIDSet = positionIntermediate1.Executed.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate2 := replication.Position{GTIDSet: replication.Mysql56GTIDSet{}} - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID1) - positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate2 := &RelayLogPositions{ + Combined: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + Executed: replication.Position{GTIDSet: replication.Mysql56GTIDSet{}}, + } + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID1) + positionIntermediate2.Combined.GTIDSet = positionIntermediate2.Combined.GTIDSet.AddGTID(mysqlGTID2) + positionIntermediate2.Executed.GTIDSet = positionIntermediate2.Executed.GTIDSet.AddGTID(mysqlGTID2) tests := []struct { name string - validCandidates map[string]replication.Position + validCandidates map[string]*RelayLogPositions tabletMap map[string]*topo.TabletInfo tabletRes []*topodatapb.Tablet }{ { name: "test conversion", - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": positionMostAdvanced, "zone1-0000000101": positionIntermediate1, "zone1-0000000102": positionIntermediate2, + "zone1-0000000103": positionAlmostMostAdvanced, }, tabletMap: map[string]*topo.TabletInfo{ "zone1-0000000100": { @@ -1558,6 +1585,15 @@ func TestGetValidCandidatesAndPositionsAsList(t *testing.T) { Hostname: "requires force start", }, }, + "zone1-0000000103": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + Hostname: "2nd primary-elect", + }, + }, "zone1-0000000404": { Tablet: &topodatapb.Tablet{ Alias: &topodatapb.TabletAlias{ @@ -1575,6 +1611,12 @@ func TestGetValidCandidatesAndPositionsAsList(t *testing.T) { Uid: 100, }, Hostname: "primary-elect", + }, { + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + Hostname: "2nd primary-elect", }, { Alias: &topodatapb.TabletAlias{ Cell: "zone1", @@ -1724,13 +1766,13 @@ func TestWaitForCatchUp(t *testing.T) { func TestRestrictValidCandidates(t *testing.T) { tests := []struct { name string - validCandidates map[string]replication.Position + validCandidates map[string]*RelayLogPositions tabletMap map[string]*topo.TabletInfo - result map[string]replication.Position + result map[string]*RelayLogPositions }{ { name: "remove invalid tablets", - validCandidates: map[string]replication.Position{ + validCandidates: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, "zone1-0000000102": {}, @@ -1794,7 +1836,7 @@ func TestRestrictValidCandidates(t *testing.T) { }, }, }, - result: map[string]replication.Position{ + result: map[string]*RelayLogPositions{ "zone1-0000000100": {}, "zone1-0000000101": {}, "zone1-0000000104": {},