From 8105ddbf232c2065944e7f3b721bc52b0a1bb2ea Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Mon, 7 Oct 2024 22:55:43 -0400 Subject: [PATCH] raft: don't propose config changes with regressed lead support Fixes #125355. If the lead support has not caught up from the previous configuration, we must not propose another configuration change. Doing so would compromise the lead support promise made by the previous configuration and used as an expiration of a leader lease. Instead, we wait for the lead support under the current configuration to catch up to the maximum lead support reached under the previous config. If the lead support is never able to catch up, the leader will eventually step down due to CheckQuorum. As a reminder, if we reject a config change, it will be reproposed again shortly, at which point the lead support will likely have caught up. As a second reminder, we fortify both voters and learners. We also always add new replicas as learners before promoting them to voters. The combination here means that a leader will commonly already be supported a voter when it is promoted added to the lead support computation. Release note: None --- pkg/raft/confchange/validate.go | 12 + pkg/raft/confchange/validate_test.go | 24 + pkg/raft/raft.go | 1 + .../confchange_fortification_safety.txt | 410 ++++++++++++++++++ pkg/raft/tracker/fortificationtracker.go | 76 +++- pkg/raft/tracker/fortificationtracker_test.go | 130 ++++++ 6 files changed, 650 insertions(+), 3 deletions(-) create mode 100644 pkg/raft/testdata/confchange_fortification_safety.txt diff --git a/pkg/raft/confchange/validate.go b/pkg/raft/confchange/validate.go index 8bb5a124f55d..ae34821b3385 100644 --- a/pkg/raft/confchange/validate.go +++ b/pkg/raft/confchange/validate.go @@ -18,6 +18,7 @@ type ValidationContext struct { CurConfig *quorum.Config Applied uint64 PendingConfIndex uint64 + LeadSupportSafe bool DisableValidationAgainstCurConfig bool } @@ -35,6 +36,17 @@ func ValidateProp(ctx ValidationContext, cc pb.ConfChangeV2) error { ctx.PendingConfIndex, ctx.Applied) } + // If the lead support has not caught up from the previous configuration, we + // must not propose another configuration change. Doing so would compromise + // the lead support promise made by the previous configuration and used as an + // expiration of a leader lease. Instead, we wait for the lead support under + // the current configuration to catch up to the maximum lead support reached + // under the previous config. If the lead support is never able to catch up, + // the leader will eventually step down due to CheckQuorum. + if !ctx.LeadSupportSafe { + return errors.Errorf("lead support has not caught up to previous configuration") + } + // The DisableValidationAgainstCurConfig flag allows disabling config change // constraints that are guaranteed by the upper state machine layer (incorrect // ones will apply as no-ops). See raft.Config.DisableConfChangeValidation for diff --git a/pkg/raft/confchange/validate_test.go b/pkg/raft/confchange/validate_test.go index d4e95bba65f4..30197ce5c186 100644 --- a/pkg/raft/confchange/validate_test.go +++ b/pkg/raft/confchange/validate_test.go @@ -40,6 +40,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -49,6 +50,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 10, + LeadSupportSafe: true, }, cc: changeNormal, }, @@ -58,16 +60,29 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 15, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "possible unapplied conf change at index 15 \\(applied to 10\\)", }, + { + name: "invalid, unsafe lead support", + ctx: ValidationContext{ + CurConfig: configNormal, + Applied: 10, + PendingConfIndex: 5, + LeadSupportSafe: false, + }, + cc: changeNormal, + expErr: "lead support has not caught up to previous configuration", + }, { name: "invalid, already in joint state", ctx: ValidationContext{ CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeNormal, expErr: "must transition out of joint config first", @@ -78,6 +93,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: changeLeaveJoint, expErr: "not in joint state; refusing empty conf change", @@ -88,6 +104,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configJoint, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeNormal, @@ -98,6 +115,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: changeLeaveJoint, @@ -108,6 +126,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -121,6 +140,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -133,6 +153,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeRemoveNode, NodeID: 3}, @@ -146,6 +167,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ {Type: pb.ConfChangeAddLearnerNode, NodeID: 3}, @@ -159,6 +181,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ @@ -172,6 +195,7 @@ func TestValidateProp(t *testing.T) { CurConfig: configNormal, Applied: 10, PendingConfIndex: 5, + LeadSupportSafe: true, DisableValidationAgainstCurConfig: true, }, cc: pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{ diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index b4296ccbda9e..3c9e2b1829c0 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -1634,6 +1634,7 @@ func stepLeader(r *raft, m pb.Message) error { CurConfig: &r.config, Applied: r.raftLog.applied, PendingConfIndex: r.pendingConfIndex, + LeadSupportSafe: r.fortificationTracker.ConfigChangeSafe(), DisableValidationAgainstCurConfig: r.disableConfChangeValidation, } if err := confchange.ValidateProp(ccCtx, cc.AsV2()); err != nil { diff --git a/pkg/raft/testdata/confchange_fortification_safety.txt b/pkg/raft/testdata/confchange_fortification_safety.txt new file mode 100644 index 000000000000..444abb78ff60 --- /dev/null +++ b/pkg/raft/testdata/confchange_fortification_safety.txt @@ -0,0 +1,410 @@ +# This test verifies that configuration changes are not permitted to compromise +# the safety of leader fortification. +# +# For details, see FortificationTracker.ConfigChangeSafe. +add-nodes 4 voters=(1, 2, 3) index=2 +---- +INFO 1 switched to configuration voters=(1 2 3) +INFO 1 became follower at term 0 +INFO newRaft 1 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 2 switched to configuration voters=(1 2 3) +INFO 2 became follower at term 0 +INFO newRaft 2 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 3 switched to configuration voters=(1 2 3) +INFO 3 became follower at term 0 +INFO newRaft 3 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] +INFO 4 switched to configuration voters=(1 2 3) +INFO 4 became follower at term 0 +INFO newRaft 4 [peers: [1,2,3], term: 0, commit: 2, applied: 2, lastindex: 2, lastterm: 1] + +campaign 1 +---- +INFO 1 is starting a new election at term 0 +INFO 1 became candidate at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 2 at term 1 +INFO 1 [logterm: 1, index: 2] sent MsgVote request to 3 at term 1 + +stabilize log-level=none +---- +ok + +# Withdraw store liveness support from 3 for 1 so that the lead support is +# fragile. +withdraw-support 3 1 +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +# Add v4. +propose-conf-change 1 +v4 +---- +ok + +stabilize 1 2 3 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/3 Commit:3 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:3 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:3 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 + 1->3 MsgApp Term:1 Log:1/4 Commit:4 + INFO 1 switched to configuration voters=(1 2 3 4) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgFortifyLeader Term:1 Log:0/0 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 2 switched to configuration voters=(1 2 3 4) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:4 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/4 EntryConfChangeV2 v4 + Messages: + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + INFO 3 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/4 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/4 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 + +# Try to demote v2. This should be rejected because v1's lead support under the +# current config has not caught up to the previous config. +propose-conf-change 1 +r2 l2 +---- +INFO 1 ignoring conf change {ConfChangeTransitionAuto [{ConfChangeRemoveNode 2} {ConfChangeAddLearnerNode 2}] []} at config voters=(1 2 3 4): lead support has not caught up to previous configuration + +# Fortify v4 so that lead support catches up. +stabilize 1 4 +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + Messages: + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] +> 4 receiving messages + 1->4 MsgFortifyLeader Term:1 Log:0/0 + INFO 4 [term: 0] received a MsgFortifyLeader message with higher term from 1 [term: 1] + INFO 4 became follower at term 1 + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[1/4 EntryConfChangeV2 v4] + DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:2 Lead:1 LeadEpoch:1 + Messages: + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 +> 1 receiving messages + 4->1 MsgFortifyLeaderResp Term:1 Log:0/0 LeadEpoch:1 + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + DEBUG 1 received MsgAppResp(rejected, hint: (index 2, term 1)) from 4 for index 3 + DEBUG 1 decreased progress of 4 to [StateProbe match=0 next=3 sentCommit=2 matchCommit=2] +> 1 handling Ready + Ready MustSync=false: + Messages: + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/3 Commit:4 Entries:[ + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] + DEBUG 4 [logterm: 0, index: 3] rejected MsgApp [logterm: 1, index: 3] from 1 + 1->4 MsgApp Term:1 Log:1/2 Commit:4 Entries:[ + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + ] +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:4 Lead:1 LeadEpoch:1 + Entries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + 1/5 EntryNormal "" + CommittedEntries: + 1/3 EntryNormal "" + 1/4 EntryConfChangeV2 v4 + Messages: + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + INFO 4 switched to configuration voters=(1 2 3 4) +> 1 receiving messages + 4->1 MsgAppResp Term:1 Log:1/3 Rejected (Hint: 2) Commit:2 + DEBUG 1 received MsgAppResp(rejected, hint: (index 2, term 1)) from 4 for index 3 + 4->1 MsgAppResp Term:1 Log:0/5 Commit:4 + +store-liveness +---- + 1 2 3 4 +1 1 1 1 1 +2 1 1 1 1 +3 x 1 1 1 +4 1 1 1 1 + +print-fortification-state 1 +---- +1 : 1 +2 : 1 +3 : 1 +4 : 1 + +# Try to demote v2 again. This time, the proposal should be allowed. +propose-conf-change 1 +r2 l2 +---- +ok + +stabilize +---- +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->2 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/4 Commit:4 Entries:[1/5 EntryNormal ""] + 1->3 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/5 Commit:4 Entries:[1/6 EntryConfChangeV2 r2 l2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/5 Commit:4 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:4 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:4 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 + INFO 1 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave + INFO initiating automatic transition out of joint configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:5 + 1->2 MsgApp Term:1 Log:1/6 Commit:6 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:5 + 1->3 MsgApp Term:1 Log:1/6 Commit:6 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:5 + 1->4 MsgApp Term:1 Log:1/6 Commit:6 +> 1 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 2 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 3 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:6 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/5 EntryNormal "" + 1/6 EntryConfChangeV2 r2 l2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 + INFO 4 switched to configuration voters=(1 3 4)&&(1 2 3 4) learners_next=(2) autoleave +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 2->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 3->1 MsgAppResp Term:1 Log:0/6 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:5 + 4->1 MsgAppResp Term:1 Log:0/6 Commit:6 +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/6 Commit:6 Entries:[1/7 EntryConfChangeV2] +> 2 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 3 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 4 handling Ready + Ready MustSync=true: + Entries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:6 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:6 +> 1 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 1->2 MsgApp Term:1 Log:1/7 Commit:7 + 1->3 MsgApp Term:1 Log:1/7 Commit:7 + 1->4 MsgApp Term:1 Log:1/7 Commit:7 + INFO 1 switched to configuration voters=(1 3 4) learners=(2) +> 2 receiving messages + 1->2 MsgApp Term:1 Log:1/7 Commit:7 +> 3 receiving messages + 1->3 MsgApp Term:1 Log:1/7 Commit:7 +> 4 receiving messages + 1->4 MsgApp Term:1 Log:1/7 Commit:7 +> 2 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 2 switched to configuration voters=(1 3 4) learners=(2) +> 3 handling Ready + Ready MustSync=true: + HardState Term:1 Vote:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 3 switched to configuration voters=(1 3 4) learners=(2) +> 4 handling Ready + Ready MustSync=true: + HardState Term:1 Commit:7 Lead:1 LeadEpoch:1 + CommittedEntries: + 1/7 EntryConfChangeV2 + Messages: + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 + INFO 4 switched to configuration voters=(1 3 4) learners=(2) +> 1 receiving messages + 2->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 3->1 MsgAppResp Term:1 Log:0/7 Commit:7 + 4->1 MsgAppResp Term:1 Log:0/7 Commit:7 diff --git a/pkg/raft/tracker/fortificationtracker.go b/pkg/raft/tracker/fortificationtracker.go index 61872d3c4233..13755157d97a 100644 --- a/pkg/raft/tracker/fortificationtracker.go +++ b/pkg/raft/tracker/fortificationtracker.go @@ -148,6 +148,23 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta return ft.leaderMaxSupported.Load() } + // Compute the lead support using the current configuration and forward the + // leaderMaxSupported to avoid regressions when the configuration changes. + leadSupportUntil := ft.computeLeadSupportUntil(state) + return ft.leaderMaxSupported.Forward(leadSupportUntil) +} + +// computeLeadSupportUntil computes the timestamp until which the leader is +// guaranteed fortification using the current quorum configuration. +// +// Unlike LeadSupportUntil, this computation does not provide a guarantee of +// monotonicity. Specifically, its result may regress after a configuration +// change. +func (ft *FortificationTracker) computeLeadSupportUntil(state pb.StateType) hlc.Timestamp { + if state != pb.StateLeader { + panic("computeLeadSupportUntil should only be called by the leader") + } + // TODO(arul): avoid this map allocation as we're calling LeadSupportUntil // from hot paths. supportExpMap := make(map[pb.PeerID]hlc.Timestamp) @@ -162,9 +179,7 @@ func (ft *FortificationTracker) LeadSupportUntil(state pb.StateType) hlc.Timesta supportExpMap[id] = curExp } } - - leadSupportUntil := ft.config.Voters.LeadSupportExpiration(supportExpMap) - return ft.leaderMaxSupported.Forward(leadSupportUntil) + return ft.config.Voters.LeadSupportExpiration(supportExpMap) } // CanDefortify returns whether the caller can safely[1] de-fortify the term @@ -184,6 +199,61 @@ func (ft *FortificationTracker) CanDefortify() bool { return ft.storeLiveness.SupportExpired(leaderMaxSupported) } +// ConfigChangeSafe returns whether it is safe to propose a configuration change +// or not, given the current state of lead support. +// +// If the lead support has not caught up from the previous configuration, we +// must not propose another configuration change. Doing so would compromise the +// lead support promise made by the previous configuration and used as an +// expiration of a leader lease. Instead, we wait for the lead support under the +// current configuration to catch up to the maximum lead support reached under +// the previous config. If the lead support is never able to catch up, the +// leader will eventually step down due to CheckQuorum. +// +// The following timeline illustrates the hazard that this check is guarding +// against: +// +// 1. configuration A=(r1, r2, r3), leader=r1 +// - lead_support=20 (r1=30, r2=20, r3=10) +// +// 2. config change #1 adds r4 to the group +// - configuration B=(r1, r2, r3, r4) +// +// 3. lead support appears to regress to 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0) +// +// 4. any majority quorum for leader election involves r1 or r2 +// - therefore, max lead support of 20 is “safe” +// - this is analogous to how the raft Leader Completeness invariant works +// even across config changes, using either (1) single addition/removal +// at-a-time changes, or (2) joint consensus. Either way, consecutive +// configs share overlapping majorities. +// +// 5. config change #2 adds r5 to the group +// - configuration C=(r1, r2, r3, r4, r5) +// +// 6. lead_support still at 10 +// - lead_support=10 (r1=30, r2=20, r3=10, r4=0, r5=0) +// - however, max lead support of 20 no longer “safe” +// +// 7. r3 can win election with support from r4 and r5 before time 20 +// - neither r1 nor r2 need to be involved +// - HAZARD! this could violate the original lead support promise +// +// To avoid this hazard, we must wait for the lead support under configuration B +// to catch up to the maximum lead support reached under configuration A before +// allowing the proposal of configuration C. This ensures that the overlapping +// majorities between subsequent configurations preserve the safety of lead +// support. +func (ft *FortificationTracker) ConfigChangeSafe() bool { + // A configuration change is only safe if the current configuration's lead + // support has caught up to the maximum lead support reached under the + // previous configuration, which is reflected in leaderMaxSupported. + // + // NB: Only run by the leader. + return ft.leaderMaxSupported.Load().LessEq(ft.computeLeadSupportUntil(pb.StateLeader)) +} + // QuorumActive returns whether the leader is currently supported by a quorum or // not. func (ft *FortificationTracker) QuorumActive() bool { diff --git a/pkg/raft/tracker/fortificationtracker_test.go b/pkg/raft/tracker/fortificationtracker_test.go index efd498f124da..896284e1d4c1 100644 --- a/pkg/raft/tracker/fortificationtracker_test.go +++ b/pkg/raft/tracker/fortificationtracker_test.go @@ -492,6 +492,136 @@ func TestCanDefortify(t *testing.T) { } } +// TestConfigChangeSafe tests whether a leader can safely propose a config +// change. +func TestConfigChangeSafe(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ts := func(ts int64) hlc.Timestamp { + return hlc.Timestamp{ + WallTime: ts, + } + } + + testCases := []struct { + afterConfigChange func(sl *mockStoreLiveness, tracker *FortificationTracker) + expConfigChangeSafe bool + expLeadSupportUntil hlc.Timestamp + }{ + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // Nothing. r4 not providing store liveness support or fortified. + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 providing store liveness support but not fortified. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified at earlier epoch. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 39) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, but support still lagging. + sl.liveness[4] = makeMockLivenessEntry(40, ts(5)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: false, + expLeadSupportUntil: ts(15), // clamped at 15 + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(15)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support caught up. + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 fortified, support beyond previous config. + sl.liveness[2] = makeMockLivenessEntry(20, ts(25)) + sl.liveness[4] = makeMockLivenessEntry(40, ts(25)) + ft.RecordFortification(4, 40) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers caught up. + sl.liveness[1] = makeMockLivenessEntry(10, ts(15)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(15), + }, + { + afterConfigChange: func(sl *mockStoreLiveness, ft *FortificationTracker) { + // r4 not providing store liveness support or fortified. However, + // support from other peers beyond previous config. + sl.liveness[1] = makeMockLivenessEntry(10, ts(20)) + sl.liveness[2] = makeMockLivenessEntry(20, ts(20)) + }, + expConfigChangeSafe: true, + expLeadSupportUntil: ts(20), + }, + } + + for _, tc := range testCases { + mockLiveness := makeMockStoreLiveness( + map[pb.PeerID]mockLivenessEntry{ + 1: makeMockLivenessEntry(10, ts(10)), + 2: makeMockLivenessEntry(20, ts(15)), + 3: makeMockLivenessEntry(30, ts(20)), + }, + ) + + cfg := quorum.MakeEmptyConfig() + for _, id := range []pb.PeerID{1, 2, 3} { + cfg.Voters[0][id] = struct{}{} + } + ft := NewFortificationTracker(&cfg, mockLiveness, raftlogger.DiscardLogger) + + // Fortify the leader before the configuration change. + ft.RecordFortification(1, 10) + ft.RecordFortification(2, 20) + ft.RecordFortification(3, 30) + require.Equal(t, ts(15), ft.LeadSupportUntil(pb.StateLeader)) + + // Perform a configuration change that adds r4 to the voter set. + cfg.Voters[0][4] = struct{}{} + + tc.afterConfigChange(&mockLiveness, ft) + + require.Equal(t, tc.expConfigChangeSafe, ft.ConfigChangeSafe()) + require.Equal(t, tc.expLeadSupportUntil, ft.LeadSupportUntil(pb.StateLeader)) + } +} + type mockLivenessEntry struct { epoch pb.Epoch ts hlc.Timestamp