From 9669909103b66c7da805827c6e9422798f1340c6 Mon Sep 17 00:00:00 2001 From: Gary Malouf <982483+gmalouf@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:17:03 -0500 Subject: [PATCH 1/5] Support more aggressive period 0 deadline timeout. --- agreement/actions.go | 3 +-- agreement/player.go | 12 ++++++++++-- agreement/service_test.go | 32 ++++++++++++++++---------------- agreement/types.go | 15 +++++++++------ config/consensus.go | 9 +++++++-- 5 files changed, 43 insertions(+), 28 deletions(-) diff --git a/agreement/actions.go b/agreement/actions.go index 0e0d5f19c4..ef2dd76c2b 100644 --- a/agreement/actions.go +++ b/agreement/actions.go @@ -232,8 +232,7 @@ type ensureAction struct { Payload proposal // the certificate proving commitment Certificate Certificate - - // The time that the winning proposal-vote was validated, relative to the beginning of the round + // The time that the winning proposal-vote was validated for round credentialRoundLag back from the current one voteValidatedAt time.Duration // The dynamic filter timeout calculated for this round, even if not enabled, for reporting to telemetry. dynamicFilterTimeout time.Duration diff --git a/agreement/player.go b/agreement/player.go index 21d65ecf88..a769451e0c 100644 --- a/agreement/player.go +++ b/agreement/player.go @@ -103,7 +103,7 @@ func (p *player) handle(r routerHandle, e event) []action { switch p.Step { case soft: // precondition: nap = false - actions = p.issueSoftVote(r) + actions = p.issueSoftVote(r, e) p.Step = cert // update tracer state to match player r.t.setMetadata(tracerMetadata{p.Round, p.Period, p.Step}) @@ -158,8 +158,16 @@ func (p *player) handleFastTimeout(r routerHandle, e timeoutEvent) []action { return p.issueFastVote(r) } -func (p *player) issueSoftVote(r routerHandle) (actions []action) { +func (p *player) issueSoftVote(r routerHandle, te timeoutEvent) (actions []action) { defer func() { + var deadlineTimeout time.Duration + if te.Proto.Err != nil { + r.t.log.Errorf("failed to read protocol version for timeout event (proto %v): %v. "+ + "Falling Back to default deadline timeout.", te.Proto.Version, te.Proto.Err) + deadlineTimeout = defaultDeadlineTimeout + } else { + deadlineTimeout = DeadlineTimeout(p.Period, te.Proto.Version) + } p.Deadline = Deadline{Duration: deadlineTimeout, Type: TimeoutDeadline} }() diff --git a/agreement/service_test.go b/agreement/service_test.go index f6ff27fd2b..81443d7529 100644 --- a/agreement/service_test.go +++ b/agreement/service_test.go @@ -1334,7 +1334,7 @@ func TestAgreementFastRecoveryDownMiss(t *testing.T) { triggerGlobalTimeout(FilterTimeout(0, version), TimeoutFilter, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) triggerGlobalTimeout(0, TimeoutFastRecovery, clocks, activityMonitor) // activates fast partition recovery timer @@ -1435,7 +1435,7 @@ func TestAgreementFastRecoveryLate(t *testing.T) { } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) triggerGlobalTimeout(0, TimeoutFastRecovery, clocks, activityMonitor) // activates fast partition recovery timer @@ -1548,7 +1548,7 @@ func TestAgreementFastRecoveryRedo(t *testing.T) { } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) triggerGlobalTimeout(0, TimeoutFastRecovery, clocks, activityMonitor) // activates fast partition recovery timer @@ -1589,7 +1589,7 @@ func TestAgreementFastRecoveryRedo(t *testing.T) { triggerGlobalTimeout(FilterTimeout(1, version), TimeoutFilter, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(1, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) triggerGlobalTimeout(0, TimeoutFastRecovery, clocks, activityMonitor) // activates fast partition recovery timer @@ -1681,7 +1681,7 @@ func TestAgreementBlockReplayBug_b29ea57(t *testing.T) { triggerGlobalTimeout(FilterTimeout(0, version), TimeoutFilter, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -1690,7 +1690,7 @@ func TestAgreementBlockReplayBug_b29ea57(t *testing.T) { triggerGlobalTimeout(FilterTimeout(1, version), TimeoutFilter, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(1, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -1743,7 +1743,7 @@ func TestAgreementLateCertBug(t *testing.T) { closeFn() baseNetwork.repairAll() - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -1819,7 +1819,7 @@ func TestAgreementRecoverGlobalStartingValue(t *testing.T) { } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) require.Equal(t, 4, int(zeroes)) } @@ -1846,7 +1846,7 @@ func TestAgreementRecoverGlobalStartingValue(t *testing.T) { } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(1, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) require.Equal(t, 5, int(zeroes)) } @@ -1924,7 +1924,7 @@ func TestAgreementRecoverGlobalStartingValueBadProposal(t *testing.T) { } return params }) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) require.Equal(t, 4, int(zeroes)) } @@ -1950,7 +1950,7 @@ func TestAgreementRecoverGlobalStartingValueBadProposal(t *testing.T) { panic(errstr) } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(1, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -2025,7 +2025,7 @@ func TestAgreementRecoverBothVAndBotQuorums(t *testing.T) { } // generate a bottom quorum; let only one node see it. baseNetwork.crown(0) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) if clocks[0].(*testingClock).zeroes != zeroes+1 { errstr := fmt.Sprintf("node 0 did not enter new period from bot quorum") panic(errstr) @@ -2076,7 +2076,7 @@ func TestAgreementRecoverBothVAndBotQuorums(t *testing.T) { } } - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(1, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -2190,7 +2190,7 @@ func TestAgreementSlowPayloadsPostDeadline(t *testing.T) { { triggerGlobalTimeout(FilterTimeout(0, version), TimeoutFilter, clocks, activityMonitor) zeroes = expectNoNewPeriod(clocks, zeroes) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) } @@ -2251,7 +2251,7 @@ func TestAgreementLargePeriods(t *testing.T) { zeroes = expectNoNewPeriod(clocks, zeroes) baseNetwork.repairAll() - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(period(p), version), TimeoutDeadline, clocks, activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) require.Equal(t, 4+p, int(zeroes)) } @@ -2363,7 +2363,7 @@ func TestAgreementRegression_WrongPeriodPayloadVerificationCancellation_8ba23942 // release proposed blocks in a controlled manner to prevent oversubscription of verification pocket1 := make(chan multicastParams, 100) closeFn = baseNetwork.pocketAllCompound(pocket1) - triggerGlobalTimeout(deadlineTimeout, TimeoutDeadline, clocks, activityMonitor) + triggerGlobalTimeout(DeadlineTimeout(0, version), TimeoutDeadline, clocks, activityMonitor) baseNetwork.repairAll() close(pocket1) { diff --git a/agreement/types.go b/agreement/types.go index 664fd3a2b0..ed06fc2af1 100644 --- a/agreement/types.go +++ b/agreement/types.go @@ -50,7 +50,7 @@ type Deadline struct { Type TimeoutType } -var deadlineTimeout = config.Protocol.BigLambda + config.Protocol.SmallLambda +var defaultDeadlineTimeout = config.Protocol.BigLambda + config.Protocol.SmallLambda var partitionStep = next + 3 var recoveryExtraTimeout = config.Protocol.SmallLambda @@ -64,8 +64,11 @@ func FilterTimeout(p period, v protocol.ConsensusVersion) time.Duration { } // DeadlineTimeout is the duration of the second agreement step. -func DeadlineTimeout() time.Duration { - return deadlineTimeout +func DeadlineTimeout(p period, v protocol.ConsensusVersion) time.Duration { + if p == 0 { + return config.Consensus[v].AgreementDeadlineTimeoutPeriod0 + } + return defaultDeadlineTimeout } type ( @@ -93,9 +96,9 @@ const ( ) func (s step) nextVoteRanges() (lower, upper time.Duration) { - extra := recoveryExtraTimeout // eg 2000 ms - lower = deadlineTimeout // eg 17000 ms (15000 + 2000) - upper = lower + extra // eg 19000 ms + extra := recoveryExtraTimeout // eg 2000 ms + lower = defaultDeadlineTimeout // eg 17000 ms (15000 + 2000) + upper = lower + extra // eg 19000 ms for i := next; i < s; i++ { extra *= 2 diff --git a/config/consensus.go b/config/consensus.go index a2f28b97d5..32e3d9dce4 100644 --- a/config/consensus.go +++ b/config/consensus.go @@ -162,6 +162,8 @@ type ConsensusParams struct { // time for nodes to wait for block proposal headers for period = 0, value should be configured to suit best case // critical path AgreementFilterTimeoutPeriod0 time.Duration + // Duration of the second agreement step for period=0, value should be configured to suit best case critical path + AgreementDeadlineTimeoutPeriod0 time.Duration FastRecoveryLambda time.Duration // time between fast recovery attempts @@ -848,8 +850,9 @@ func initConsensusProtocols() { DownCommitteeSize: 10000, DownCommitteeThreshold: 7750, - AgreementFilterTimeout: 4 * time.Second, - AgreementFilterTimeoutPeriod0: 4 * time.Second, + AgreementFilterTimeout: 4 * time.Second, + AgreementFilterTimeoutPeriod0: 4 * time.Second, + AgreementDeadlineTimeoutPeriod0: Protocol.BigLambda + Protocol.SmallLambda, FastRecoveryLambda: 5 * time.Minute, @@ -1389,6 +1392,8 @@ func initConsensusProtocols() { vFuture.LogicSigVersion = 10 // When moving this to a release, put a new higher LogicSigVersion here vFuture.EnableLogicSigCostPooling = true + vFuture.AgreementDeadlineTimeoutPeriod0 = 4000 * time.Millisecond + vFuture.StateProofBlockHashInLightHeader = true // Setting DynamicFilterTimeout in vFuture will impact e2e test performance From 6f07933e71b73060a05945108bd594c604e6402f Mon Sep 17 00:00:00 2001 From: Gary Malouf <982483+gmalouf@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:34:31 -0500 Subject: [PATCH 2/5] Rename deadlineTimeout to 'roundTimeEstimate' in catchup/service.go to better reflect the variable's purpose. Fix deadline timeout references that either needed to provide period now or needed the default deadline timeout. --- agreement/types.go | 7 ++++++- catchup/service.go | 16 ++++++++-------- catchup/service_test.go | 20 ++++++++++---------- daemon/algod/api/server/common/handlers.go | 4 ++-- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/agreement/types.go b/agreement/types.go index ed06fc2af1..fb60166478 100644 --- a/agreement/types.go +++ b/agreement/types.go @@ -63,7 +63,7 @@ func FilterTimeout(p period, v protocol.ConsensusVersion) time.Duration { return config.Consensus[v].AgreementFilterTimeout } -// DeadlineTimeout is the duration of the second agreement step. +// DeadlineTimeout is the duration of the second agreement step, varying based on period and consensus version. func DeadlineTimeout(p period, v protocol.ConsensusVersion) time.Duration { if p == 0 { return config.Consensus[v].AgreementDeadlineTimeoutPeriod0 @@ -71,6 +71,11 @@ func DeadlineTimeout(p period, v protocol.ConsensusVersion) time.Duration { return defaultDeadlineTimeout } +// DefaultDeadlineTimeout is the default duration of the second agreement step. +func DefaultDeadlineTimeout() time.Duration { + return defaultDeadlineTimeout +} + type ( // round denotes a single round of the agreement protocol round = basics.Round diff --git a/catchup/service.go b/catchup/service.go index bcf204b134..01ff1678ef 100644 --- a/catchup/service.go +++ b/catchup/service.go @@ -91,7 +91,7 @@ type Service struct { net network.GossipNode auth BlockAuthenticator parallelBlocks uint64 - deadlineTimeout time.Duration + roundTimeEstimate time.Duration prevBlockFetchTime time.Time blockValidationPool execpool.BacklogPool @@ -146,7 +146,7 @@ func MakeService(log logging.Logger, config config.Local, net network.GossipNode s.unmatchedPendingCertificates = unmatchedPendingCertificates s.log = log.With("Context", "sync") s.parallelBlocks = config.CatchupParallelBlocks - s.deadlineTimeout = agreement.DeadlineTimeout() + s.roundTimeEstimate = agreement.DefaultDeadlineTimeout() s.blockValidationPool = blockValidationPool s.syncNow = make(chan struct{}, 1) @@ -556,11 +556,11 @@ func (s *Service) pipelinedFetch(seedLookback uint64) { // if ledger is busy, pause for some time to let the fetchAndWrite goroutines to finish fetching in-flight blocks. start := time.Now() - for (s.ledger.IsWritingCatchpointDataFile() || s.ledger.IsBehindCommittingDeltas()) && time.Since(start) < s.deadlineTimeout { + for (s.ledger.IsWritingCatchpointDataFile() || s.ledger.IsBehindCommittingDeltas()) && time.Since(start) < s.roundTimeEstimate { time.Sleep(100 * time.Millisecond) } - // if ledger is still busy after s.deadlineTimeout timeout then abort the current pipelinedFetch invocation. + // if ledger is still busy after s.roundTimeEstimate timeout then abort the current pipelinedFetch invocation. // if we're writing a catchpoint file, stop catching up to reduce the memory pressure. Once we finish writing the file we // could resume with the catchup. @@ -616,7 +616,7 @@ func (s *Service) periodicSync() { s.sync() } stuckInARow := 0 - sleepDuration := s.deadlineTimeout + sleepDuration := s.roundTimeEstimate for { currBlock := s.ledger.LastRound() select { @@ -627,7 +627,7 @@ func (s *Service) periodicSync() { stuckInARow = 0 // go to sleep for a short while, for a random duration. // we want to sleep for a random duration since it would "de-syncronize" us from the ledger advance sync - sleepDuration = time.Duration(crypto.RandUint63()) % s.deadlineTimeout + sleepDuration = time.Duration(crypto.RandUint63()) % s.roundTimeEstimate continue case <-s.syncNow: if s.parallelBlocks == 0 || s.ledger.IsWritingCatchpointDataFile() || s.ledger.IsBehindCommittingDeltas() { @@ -637,8 +637,8 @@ func (s *Service) periodicSync() { s.log.Info("Immediate resync triggered; resyncing") s.sync() case <-time.After(sleepDuration): - if sleepDuration < s.deadlineTimeout || s.cfg.DisableNetworking { - sleepDuration = s.deadlineTimeout + if sleepDuration < s.roundTimeEstimate || s.cfg.DisableNetworking { + sleepDuration = s.roundTimeEstimate continue } // if the catchup is disabled in the config file, just skip it. diff --git a/catchup/service_test.go b/catchup/service_test.go index fc0ae38e1d..0c4cb5cc6b 100644 --- a/catchup/service_test.go +++ b/catchup/service_test.go @@ -237,7 +237,7 @@ func TestSyncRound(t *testing.T) { localCfg := config.GetDefaultLocal() s := MakeService(logging.Base(), localCfg, net, local, auth, nil, nil) s.log = &periodicSyncLogger{Logger: logging.Base()} - s.deadlineTimeout = 2 * time.Second + s.roundTimeEstimate = 2 * time.Second // Set disable round success err = s.SetDisableSyncRound(3) @@ -246,14 +246,14 @@ func TestSyncRound(t *testing.T) { s.Start() defer s.Stop() // wait past the initial sync - which is known to fail due to the above "auth" - time.Sleep(s.deadlineTimeout*2 - 200*time.Millisecond) + time.Sleep(s.roundTimeEstimate*2 - 200*time.Millisecond) require.Equal(t, initialLocalRound, local.LastRound()) auth.alter(-1, false) // wait until the catchup is done. Since we've might have missed the sleep window, we need to wait // until the synchronization is complete. waitStart := time.Now() - for time.Since(waitStart) < 2*s.deadlineTimeout { + for time.Since(waitStart) < 2*s.roundTimeEstimate { if remote.LastRound() == local.LastRound() { break } @@ -276,7 +276,7 @@ func TestSyncRound(t *testing.T) { s.UnsetDisableSyncRound() // wait until the catchup is done waitStart = time.Now() - for time.Since(waitStart) < 8*s.deadlineTimeout { + for time.Since(waitStart) < 8*s.roundTimeEstimate { if remote.LastRound() == local.LastRound() { break } @@ -326,19 +326,19 @@ func TestPeriodicSync(t *testing.T) { // Make Service s := MakeService(logging.Base(), defaultConfig, net, local, auth, nil, nil) s.log = &periodicSyncLogger{Logger: logging.Base()} - s.deadlineTimeout = 2 * time.Second + s.roundTimeEstimate = 2 * time.Second s.Start() defer s.Stop() // wait past the initial sync - which is known to fail due to the above "auth" - time.Sleep(s.deadlineTimeout*2 - 200*time.Millisecond) + time.Sleep(s.roundTimeEstimate*2 - 200*time.Millisecond) require.Equal(t, initialLocalRound, local.LastRound()) auth.alter(-1, false) // wait until the catchup is done. Since we've might have missed the sleep window, we need to wait // until the synchronization is complete. waitStart := time.Now() - for time.Since(waitStart) < 10*s.deadlineTimeout { + for time.Since(waitStart) < 10*s.roundTimeEstimate { if remote.LastRound() == local.LastRound() { break } @@ -717,7 +717,7 @@ func helperTestOnSwitchToUnSupportedProtocol( // Make Service s := MakeService(logging.Base(), config, net, local, &mockedAuthenticator{errorRound: -1}, nil, nil) - s.deadlineTimeout = 2 * time.Second + s.roundTimeEstimate = 2 * time.Second s.Start() defer s.Stop() @@ -1198,7 +1198,7 @@ func TestServiceLedgerUnavailable(t *testing.T) { cfg.CatchupParallelBlocks = 2 s := MakeService(logging.Base(), cfg, net, local, auth, nil, nil) s.log = &periodicSyncLogger{Logger: logging.Base()} - s.deadlineTimeout = 2 * time.Second + s.roundTimeEstimate = 2 * time.Second s.testStart() defer s.Stop() @@ -1245,7 +1245,7 @@ func TestServiceNoBlockForRound(t *testing.T) { s := MakeService(logging.Base(), cfg, net, local, auth, nil, nil) pl := &periodicSyncDebugLogger{periodicSyncLogger: periodicSyncLogger{Logger: logging.Base()}} s.log = pl - s.deadlineTimeout = 1 * time.Second + s.roundTimeEstimate = 1 * time.Second s.testStart() defer s.Stop() diff --git a/daemon/algod/api/server/common/handlers.go b/daemon/algod/api/server/common/handlers.go index 95a1dbcce1..938d3ee102 100644 --- a/daemon/algod/api/server/common/handlers.go +++ b/daemon/algod/api/server/common/handlers.go @@ -121,14 +121,14 @@ func Ready(ctx lib.ReqContext, context echo.Context) { // must satisfy following sub conditions: // 1. the node is not in a fast-catchup stage // 2. the node's time since last round should be [0, deadline), - // while deadline = bigLambda + smallLambda = 17s + // while deadline = agreement.DefaultDeadlineTimeout = 17s // 3. the node's catchup time is 0 isReadyFromStat := func(status node.StatusReport) bool { timeSinceLastRound := status.TimeSinceLastRound().Milliseconds() return len(status.Catchpoint) == 0 && timeSinceLastRound >= 0 && - timeSinceLastRound < agreement.DeadlineTimeout().Milliseconds() && + timeSinceLastRound < agreement.DefaultDeadlineTimeout().Milliseconds() && status.CatchupTime.Milliseconds() == 0 } From f7227f78820f5f32aa51210baacd0e9fcdeaeff0 Mon Sep 17 00:00:00 2001 From: Gary <982483+gmalouf@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:31:42 -0500 Subject: [PATCH 3/5] Update config/consensus.go Co-authored-by: John Jannotti --- config/consensus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/consensus.go b/config/consensus.go index 32e3d9dce4..95004e91f9 100644 --- a/config/consensus.go +++ b/config/consensus.go @@ -1392,7 +1392,7 @@ func initConsensusProtocols() { vFuture.LogicSigVersion = 10 // When moving this to a release, put a new higher LogicSigVersion here vFuture.EnableLogicSigCostPooling = true - vFuture.AgreementDeadlineTimeoutPeriod0 = 4000 * time.Millisecond + vFuture.AgreementDeadlineTimeoutPeriod0 = 4 * time.Second vFuture.StateProofBlockHashInLightHeader = true From 0d6614c8c266d5f98228cc9defc8e4033d9842d5 Mon Sep 17 00:00:00 2001 From: Gary Malouf <982483+gmalouf@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:53:35 -0500 Subject: [PATCH 4/5] Add additional condition to consensus version if check in issueSoftVote to for a possible version zero value. --- agreement/player.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agreement/player.go b/agreement/player.go index a769451e0c..a31b997667 100644 --- a/agreement/player.go +++ b/agreement/player.go @@ -161,8 +161,8 @@ func (p *player) handleFastTimeout(r routerHandle, e timeoutEvent) []action { func (p *player) issueSoftVote(r routerHandle, te timeoutEvent) (actions []action) { defer func() { var deadlineTimeout time.Duration - if te.Proto.Err != nil { - r.t.log.Errorf("failed to read protocol version for timeout event (proto %v): %v. "+ + if te.Proto.Version == "" || te.Proto.Err != nil { + r.t.log.Errorf("failed to read valid protocol version for timeout event (proto %v): %v. "+ "Falling Back to default deadline timeout.", te.Proto.Version, te.Proto.Err) deadlineTimeout = defaultDeadlineTimeout } else { From 4dd7d5788f740e570983b5cfbe45443a1d3381f1 Mon Sep 17 00:00:00 2001 From: Gary Malouf <982483+gmalouf@users.noreply.github.com> Date: Wed, 6 Dec 2023 15:21:31 -0500 Subject: [PATCH 5/5] Pull deadline timeout resolution out to timeoutevent handling in player.go, applying the resolved timeout to both issueSoftVote and nextVoteRanges. --- agreement/player.go | 31 ++++++++++++++++--------------- agreement/service_test.go | 4 ++-- agreement/types.go | 8 ++++---- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/agreement/player.go b/agreement/player.go index a31b997667..4e7ec685e3 100644 --- a/agreement/player.go +++ b/agreement/player.go @@ -100,10 +100,19 @@ func (p *player) handle(r routerHandle, e event) []action { r.t.logTimeout(*p) } + var deadlineTimeout time.Duration + if e.Proto.Version == "" || e.Proto.Err != nil { + r.t.log.Errorf("failed to read valid protocol version for timeout event (proto %v): %v. "+ + "Falling Back to default deadline timeout.", e.Proto.Version, e.Proto.Err) + deadlineTimeout = DefaultDeadlineTimeout() + } else { + deadlineTimeout = DeadlineTimeout(p.Period, e.Proto.Version) + } + switch p.Step { case soft: // precondition: nap = false - actions = p.issueSoftVote(r, e) + actions = p.issueSoftVote(r, deadlineTimeout) p.Step = cert // update tracer state to match player r.t.setMetadata(tracerMetadata{p.Round, p.Period, p.Step}) @@ -113,16 +122,16 @@ func (p *player) handle(r routerHandle, e event) []action { p.Step = next // update tracer state to match player r.t.setMetadata(tracerMetadata{p.Round, p.Period, p.Step}) - return p.issueNextVote(r) + return p.issueNextVote(r, deadlineTimeout) default: if p.Napping { - return p.issueNextVote(r) // sets p.Napping to false + return p.issueNextVote(r, deadlineTimeout) // sets p.Napping to false } // not napping, so we should enter a new step p.Step++ // note: this must happen before next timeout setting. // TODO add unit test to ensure that deadlines increase monotonically here - lower, upper := p.Step.nextVoteRanges() + lower, upper := p.Step.nextVoteRanges(deadlineTimeout) delta := time.Duration(e.RandomEntropy % uint64(upper-lower)) p.Napping = true @@ -158,16 +167,8 @@ func (p *player) handleFastTimeout(r routerHandle, e timeoutEvent) []action { return p.issueFastVote(r) } -func (p *player) issueSoftVote(r routerHandle, te timeoutEvent) (actions []action) { +func (p *player) issueSoftVote(r routerHandle, deadlineTimeout time.Duration) (actions []action) { defer func() { - var deadlineTimeout time.Duration - if te.Proto.Version == "" || te.Proto.Err != nil { - r.t.log.Errorf("failed to read valid protocol version for timeout event (proto %v): %v. "+ - "Falling Back to default deadline timeout.", te.Proto.Version, te.Proto.Err) - deadlineTimeout = defaultDeadlineTimeout - } else { - deadlineTimeout = DeadlineTimeout(p.Period, te.Proto.Version) - } p.Deadline = Deadline{Duration: deadlineTimeout, Type: TimeoutDeadline} }() @@ -210,7 +211,7 @@ func (p *player) issueCertVote(r routerHandle, e committableEvent) action { return pseudonodeAction{T: attest, Round: p.Round, Period: p.Period, Step: cert, Proposal: e.Proposal} } -func (p *player) issueNextVote(r routerHandle) []action { +func (p *player) issueNextVote(r routerHandle, deadlineTimeout time.Duration) []action { actions := p.partitionPolicy(r) a := pseudonodeAction{T: attest, Round: p.Round, Period: p.Period, Step: p.Step, Proposal: bottom} @@ -234,7 +235,7 @@ func (p *player) issueNextVote(r routerHandle) []action { r.t.timeR().RecStep(p.Period, p.Step, a.Proposal) - _, upper := p.Step.nextVoteRanges() + _, upper := p.Step.nextVoteRanges(deadlineTimeout) p.Napping = false p.Deadline = Deadline{Duration: upper, Type: TimeoutDeadline} return actions diff --git a/agreement/service_test.go b/agreement/service_test.go index 81443d7529..bea5eda8da 100644 --- a/agreement/service_test.go +++ b/agreement/service_test.go @@ -2043,11 +2043,11 @@ func TestAgreementRecoverBothVAndBotQuorums(t *testing.T) { activityMonitor.waitForQuiet() // actually create the value quorum - _, upper := (next).nextVoteRanges() + _, upper := (next).nextVoteRanges(DeadlineTimeout(0, version)) triggerGlobalTimeout(upper, TimeoutDeadline, clocks[1:], activityMonitor) // activates next timers zeroes = expectNoNewPeriod(clocks[1:], zeroes) - lower, upper := (next + 1).nextVoteRanges() + lower, upper := (next + 1).nextVoteRanges(DeadlineTimeout(0, version)) delta := time.Duration(testingRand{}.Uint64() % uint64(upper-lower)) triggerGlobalTimeout(lower+delta, TimeoutDeadline, clocks[1:], activityMonitor) zeroes = expectNewPeriod(clocks, zeroes) diff --git a/agreement/types.go b/agreement/types.go index fb60166478..000f03e1ea 100644 --- a/agreement/types.go +++ b/agreement/types.go @@ -100,10 +100,10 @@ const ( down ) -func (s step) nextVoteRanges() (lower, upper time.Duration) { - extra := recoveryExtraTimeout // eg 2000 ms - lower = defaultDeadlineTimeout // eg 17000 ms (15000 + 2000) - upper = lower + extra // eg 19000 ms +func (s step) nextVoteRanges(deadlineTimeout time.Duration) (lower, upper time.Duration) { + extra := recoveryExtraTimeout // eg 2000 ms + lower = deadlineTimeout // based on types.DeadlineTimeout() + upper = lower + extra for i := next; i < s; i++ { extra *= 2