diff --git a/pkg/raft/confchange/validate.go b/pkg/raft/confchange/validate.go index 8bb5a124f55d..ae34821b3385 100644 --- a/pkg/raft/confchange/validate.go +++ b/pkg/raft/confchange/validate.go @@ -18,6 +18,7 @@ type ValidationContext struct { CurConfig *quorum.Config Applied uint64 PendingConfIndex uint64 + LeadSupportSafe bool DisableValidationAgainstCurConfig bool } @@ -35,6 +36,17 @@ func ValidateProp(ctx ValidationContext, cc pb.ConfChangeV2) error { ctx.PendingConfIndex, ctx.Applied) } + // If the lead support has not caught up from the previous configuration, we + // must not propose another configuration change. Doing so would compromise + // the lead support promise made by the previous configuration and used as an + // expiration of a leader lease. Instead, we wait for the lead support under + // the current configuration to catch up to the maximum lead support reached + // under the previous config. If the lead support is never able to catch up, + // the leader will eventually step down due to CheckQuorum. + if !ctx.LeadSupportSafe { + return errors.Errorf("lead support has not caught up to previous configuration") + } + // The DisableValidationAgainstCurConfig flag allows disabling config change // constraints that are guaranteed by the upper state machine layer (incorrect // ones will apply as no-ops). See raft.Config.DisableConfChangeValidation for diff --git a/pkg/raft/confchange/validate_test.go b/pkg/raft/confchange/validate_test.go index d4e95bba65f4..30197ce5c186 100644 --- a/pkg/raft/confchange/validate_test.go +++ b/pkg/raft/confchange/validate_test.go @@ -40,6 +40,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -49,6 +50,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 10, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -58,16 +60,29 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 15, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "possible unapplied conf change at index 15 \\(applied to 10\\)", }, + { + name: "invalid, unsafe lead support", + ctx: ValidationContext{ + CurConfig: configNormal, + Applied: 10, + PendingConfIndex: 5, + LeadSupportSafe: false, + }, + cc: changeNormal, + expErr: "lead support has not caught up to previous configuration", + }, { name: "invalid, already in joint state", ctx: ValidationContext{ CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "must transition out of joint config first", @@ -78,6 +93,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeLeaveJoint, expErr: "not in joint state; refusing empty conf change", @@ -88,6 +104,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeNormal, @@ -98,6 +115,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeLeaveJoint, @@ -108,6 +126,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -121,6 +140,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -133,6 +153,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -146,6 +167,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 3}, @@ -159,6 +181,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ @@ -172,6 +195,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 732a02b88bb1..76eaa33e11cb 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1634,6 +1634,7 @@ func stepLeader(r *raft, m pb.Message) error { CurConfig: &r.config, Applied: r.raftLog.applied, PendingConfIndex: r.pendingConfIndex, + LeadSupportSafe: r.fortificationTracker.ConfigChangeSafe(), DisableValidationAgainstCurConfig: r.disableConfChangeValidation, } if err := confchange.ValidateProp(ccCtx, cc.AsV2()); err != nil { diff --git a/pkg/raft/testdata/confchange_fortification_safety.txt b/pkg/raft/testdata/confchange_fortification_safety.txt new file mode 100644 index 000000000000..e7a481ece15a --- /dev/null +++ b/pkg/raft/testdata/confchange_fortification_safety.txt @@ -0,0 +1,398 @@ +# This test verifies that configuration changes are not permitted to compromise +# the safety of leader fortification. +# +# For details, see FortificationTracker.ConfigChangeSafe. +add-nodes 4 voters=(1, 2, 3) index=2 +---- +INFO 1 switched to configuration voters=(1 2 3) +INFO 1 became follower at term 0 +INFO newRaft 1 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 2 switched to configuration voters=(1 2 3) +INFO 2 became follower at term 0 +INFO newRaft 2 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 3 switched to configuration voters=(1 2 3) +INFO 3 became follower at term 0 +INFO newRaft 3 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 4 switched to configuration voters=(1 2 3) +INFO 4 became follower at term 0 +INFO newRaft 4 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] + +campaign 1 +---- +INFO 1 is starting a new election at term 0 +INFO 1 became candidate at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 3 at term 1 + +stabilize log-level=none +---- +ok + +# Withdraw store liveness support from 3 for 1 so that the lead support is +# fragile. +withdraw-support 3 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +# Add v4. +propose-conf-change 1 +v4 +---- +ok + +stabilize 1 2 3 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 + 1->3 MsgApp Term:1 Log:1/4 Commit:4 + INFO 1 switched to configuration voters=(1 2 3 4) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgFortifyLeader Term:1 Log:0/0 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 2 switched to configuration voters=(1 2 3 4) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 3 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 + +# Try to demote v2. This should be rejected because v1's lead support under the +# current config has not caught up to the previous config. +propose-conf-change 1 +r2 l2 +---- +INFO 1 ignoring conf change {ConfChangeTransitionAuto [{ConfChangeRemoveNode 2} {ConfChangeAddLearnerNode 2}] []} at config voters=(1 2 3 4): lead support has not caught up to previous configuration + +# Fortify v4 so that lead support catches up. +stabilize 1 4 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] +> 4 receiving messages + 1->4 MsgFortifyLeader Term:1 Log:0/0 + INFO 4 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1] + INFO 4 became follower at term 1 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] + DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:2 Lead:1 LeadEpoch:1 + Messages: + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 +> 1 receiving messages + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + DEBUG 1 received MsgAppResp(rejected, hint: (index 2, term 1)) from 4 for index 3 + DEBUG 1 decreased progress of 4 to [StateProbe match=0 next=3 sentCommit=2 matchCommit=2] +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:4 Lead:1 LeadEpoch:1 + Entries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + CommittedEntries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + Messages: + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + INFO 4 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 +4 : 1 + +# Try to demote v2 again. This time, the proposal should be allowed. +propose-conf-change 1 +r2 l2 +---- +ok + +stabilize +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 + INFO 1 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave + INFO initiating automatic transition out of joint configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 2 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 3 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 4 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/7 Commit:7 + 1->3 MsgApp Term:1 Log:1/7 Commit:7 + 1->4 MsgApp Term:1 Log:1/7 Commit:7 + INFO 1 switched to configuration voters=(1 3 4) learners=(2) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/7 Commit:7 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/7 Commit:7 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/7 Commit:7 +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 2 switched to configuration voters=(1 3 4) learners=(2) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 3 switched to configuration voters=(1 3 4) learners=(2) +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 4 switched to configuration voters=(1 3 4) learners=(2) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 diff --git a/pkg/raft/tracker/fortificationtracker.go b/pkg/raft/tracker/fortificationtracker.go index 61872d3c4233..13755157d97a 100644 --- a/pkg/raft/tracker/fortificationtracker.go +++ b/pkg/raft/tracker/fortificationtracker.go @@ -148,6 +148,23 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta return ft.leaderMaxSupported.Load() } + // Compute the lead support using the current configuration and forward the + // leaderMaxSupported to avoid regressions when the configuration changes. + leadSupportUntil := ft.computeLeadSupportUntil(state) + return ft.leaderMaxSupported.Forward(leadSupportUntil) +} + +// computeLeadSupportUntil computes the timestamp until which the leader is +// guaranteed fortification using the current quorum configuration. +// +// Unlike LeadSupportUntil, this computation does not provide a guarantee of +// monotonicity. Specifically, its result may regress after a configuration +// change. +func (ft *FortificationTracker) computeLeadSupportUntil(state pb.StateType) hlc.Timestamp { + if state != pb.StateLeader { + panic("computeLeadSupportUntil should only be called by the leader") + } + // TODO(arul): avoid this map allocation as we're calling LeadSupportUntil // from hot paths. supportExpMap := make(map[pb.PeerID]hlc.Timestamp) @@ -162,9 +179,7 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta supportExpMap[id] = curExp } } - - leadSupportUntil := ft.config.Voters.LeadSupportExpiration(supportExpMap) - return ft.leaderMaxSupported.Forward(leadSupportUntil) + return ft.config.Voters.LeadSupportExpiration(supportExpMap) } // CanDefortify returns whether the caller can safely[1] de-fortify the term @@ -184,6 +199,61 @@ func (ft *FortificationTracker) CanDefortify() bool { return ft.storeLiveness.SupportExpired(leaderMaxSupported) } +// ConfigChangeSafe returns whether it is safe to propose a configuration change +// or not, given the current state of lead support. +// +// If the lead support has not caught up from the previous configuration, we +// must not propose another configuration change. Doing so would compromise the +// lead support promise made by the previous configuration and used as an +// expiration of a leader lease. Instead, we wait for the lead support under the +// current configuration to catch up to the maximum lead support reached under +// the previous config. If the lead support is never able to catch up, the +// leader will eventually step down due to CheckQuorum. +// +// The following timeline illustrates the hazard that this check is guarding +// against: +// +// 1. configuration A=(r1, r2, r3), leader=r1 +// - lead_support=20 (r1=30, r2=20, r3=10) +// +// 2. config change #1 adds r4 to the group +// - configuration B=(r1, r2, r3, r4) +// +// 3. lead support appears to regress to 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0) +// +// 4. any majority quorum for leader election involves r1 or r2 +// - therefore, max lead support of 20 is “safe” +// - this is analogous to how the raft Leader Completeness invariant works +// even across config changes, using either (1) single addition/removal +// at-a-time changes, or (2) joint consensus. Either way, consecutive +// configs share overlapping majorities. +// +// 5. config change #2 adds r5 to the group +// - configuration C=(r1, r2, r3, r4, r5) +// +// 6. lead_support still at 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0, r5=0) +// - however, max lead support of 20 no longer “safe” +// +// 7. r3 can win election with support from r4 and r5 before time 20 +// - neither r1 nor r2 need to be involved +// - HAZARD! this could violate the original lead support promise +// +// To avoid this hazard, we must wait for the lead support under configuration B +// to catch up to the maximum lead support reached under configuration A before +// allowing the proposal of configuration C. This ensures that the overlapping +// majorities between subsequent configurations preserve the safety of lead +// support. +func (ft *FortificationTracker) ConfigChangeSafe() bool { + // A configuration change is only safe if the current configuration's lead + // support has caught up to the maximum lead support reached under the + // previous configuration, which is reflected in leaderMaxSupported. + // + // NB: Only run by the leader. + return ft.leaderMaxSupported.Load().LessEq(ft.computeLeadSupportUntil(pb.StateLeader)) +} + // QuorumActive returns whether the leader is currently supported by a quorum or // not. func (ft *FortificationTracker) QuorumActive() bool { diff --git a/pkg/raft/tracker/fortificationtracker_test.go b/pkg/raft/tracker/fortificationtracker_test.go index efd498f124da..896284e1d4c1 100644 --- a/pkg/raft/tracker/fortificationtracker_test.go +++ b/pkg/raft/tracker/fortificationtracker_test.go @@ -492,6 +492,136 @@ func TestCanDefortify(t *testing.T) { } } +// TestConfigChangeSafe tests whether a leader can safely propose a config +// change. +func TestConfigChangeSafe(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ts := func(ts int64) hlc.Timestamp { + return hlc.Timestamp{ + WallTime: ts, + } + } + + testCases := []struct { + afterConfigChange func(sl *mockStoreLiveness, tracker *FortificationTracker) + expConfigChangeSafe bool + expLeadSupportUntil hlc.Timestamp + }{ + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // Nothing. r4 not providing store liveness support or fortified. + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 providing store liveness support but not fortified. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified at earlier epoch. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 39) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, but support still lagging. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(15)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support beyond previous config. + sl.liveness[2] = makeMockLivenessEntry(20, ts(25)) + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers caught up. + sl.liveness[1] = makeMockLivenessEntry(10, ts(15)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers beyond previous config. + sl.liveness[1] = makeMockLivenessEntry(10, ts(20)) + sl.liveness[2] = makeMockLivenessEntry(20, ts(20)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + } + + for _, tc := range testCases { + mockLiveness := makeMockStoreLiveness( + map[pb.PeerID]mockLivenessEntry{ + 1: makeMockLivenessEntry(10, ts(10)), + 2: makeMockLivenessEntry(20, ts(15)), + 3: makeMockLivenessEntry(30, ts(20)), + }, + ) + + cfg := quorum.MakeEmptyConfig() + for _, id := range []pb.PeerID{1, 2, 3} { + cfg.Voters[0][id] = struct{}{} + } + ft := NewFortificationTracker(&cfg, mockLiveness, raftlogger.DiscardLogger) + + // Fortify the leader before the configuration change. + ft.RecordFortification(1, 10) + ft.RecordFortification(2, 20) + ft.RecordFortification(3, 30) + require.Equal(t, ts(15), ft.LeadSupportUntil(pb.StateLeader)) + + // Perform a configuration change that adds r4 to the voter set. + cfg.Voters[0][4] = struct{}{} + + tc.afterConfigChange(&mockLiveness, ft) + + require.Equal(t, tc.expConfigChangeSafe, ft.ConfigChangeSafe()) + require.Equal(t, tc.expLeadSupportUntil, ft.LeadSupportUntil(pb.StateLeader)) + } +} + type mockLivenessEntry struct { epoch pb.Epoch ts hlc.Timestamp