From 15f8661b468555b77bd7c6717f4d5384ab3baee8 Mon Sep 17 00:00:00 2001 From: Pavel Zbitskiy Date: Mon, 10 Jun 2024 14:08:42 -0400 Subject: [PATCH 1/2] tests: fix possible deadlock on a test failure --- ledger/ledger_test.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/ledger/ledger_test.go b/ledger/ledger_test.go index 127ecf85cb..6a092185c8 100644 --- a/ledger/ledger_test.go +++ b/ledger/ledger_test.go @@ -2932,13 +2932,20 @@ func testVotersReloadFromDiskAfterOneStateProofCommitted(t *testing.T, cfg confi } triggerDeleteVoters(t, l, genesisInitState) - l.acctsOnline.voters.votersMu.Lock() - vtSnapshot := l.acctsOnline.voters.votersForRoundCache - // verifying that the tree for round 512 is still in the cache, but the tree for round 256 is evicted. - require.Contains(t, vtSnapshot, basics.Round(496)) - require.NotContains(t, vtSnapshot, basics.Round(240)) - l.acctsOnline.voters.votersMu.Unlock() + var vtSnapshot map[basics.Round]*ledgercore.VotersForRound + func() { + // grab internal lock in order to access the voters tracker + // since the assert below might fail, use a nested scope to ensure the lock is released + l.acctsOnline.voters.votersMu.Lock() + defer l.acctsOnline.voters.votersMu.Unlock() + + vtSnapshot = l.acctsOnline.voters.votersForRoundCache + + // verifying that the tree for round 512 is still in the cache, but the tree for round 256 is evicted. + require.Contains(t, vtSnapshot, basics.Round(496)) + require.NotContains(t, vtSnapshot, basics.Round(240)) + }() err = l.reloadLedger() require.NoError(t, err) From 9d9e8d604733c7810ae22c6d54b0c3cc2d3dcec7 Mon Sep 17 00:00:00 2001 From: Pavel Zbitskiy Date: Mon, 10 Jun 2024 18:19:32 -0400 Subject: [PATCH 2/2] Fix TestVotersReloadFromDiskAfterOneStateProofCommitted --- ledger/ledger_test.go | 13 ++++++++++++- ledger/tracker.go | 3 +++ ledger/tracker_test.go | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/ledger/ledger_test.go b/ledger/ledger_test.go index 6a092185c8..2a9666688b 100644 --- a/ledger/ledger_test.go +++ b/ledger/ledger_test.go @@ -2931,7 +2931,17 @@ func testVotersReloadFromDiskAfterOneStateProofCommitted(t *testing.T, cfg confi require.NoError(t, err) } - triggerDeleteVoters(t, l, genesisInitState) + // wait all pending commits to finish + l.trackers.accountsWriting.Wait() + + // quit the commitSyncer goroutine + l.trackers.ctxCancel() + l.trackers.ctxCancel = nil + <-l.trackers.commitSyncerClosed + l.trackers.commitSyncerClosed = nil + + // flush one final time + triggerTrackerFlush(t, l) var vtSnapshot map[basics.Round]*ledgercore.VotersForRound func() { @@ -2960,6 +2970,7 @@ func TestVotersReloadFromDiskAfterOneStateProofCommitted(t *testing.T) { cfg := config.GetDefaultLocal() cfg.Archival = false cfg.MaxAcctLookback = proto.StateProofInterval - proto.StateProofVotersLookback - 10 + cfg.CatchpointInterval = 0 // no need catchpoint for this test ledgertesting.WithAndWithoutLRUCache(t, cfg, testVotersReloadFromDiskAfterOneStateProofCommitted) } diff --git a/ledger/tracker.go b/ledger/tracker.go index d99f997ebe..97098a572f 100644 --- a/ledger/tracker.go +++ b/ledger/tracker.go @@ -547,6 +547,8 @@ func (tr *trackerRegistry) commitRound(dcc *deferredCommitContext) error { offset := dcc.offset dbRound := dcc.oldBase + tr.log.Debugf("commitRound called for (%d-%d)", dbRound, dbRound+basics.Round(offset)) + // we can exit right away, as this is the result of mis-ordered call to committedUpTo. if tr.dbRound < dbRound || offset < uint64(tr.dbRound-dbRound) { tr.log.Warnf("out of order deferred commit: offset %d, dbRound %d but current tracker DB round is %d", offset, dbRound, tr.dbRound) @@ -574,6 +576,7 @@ func (tr *trackerRegistry) commitRound(dcc *deferredCommitContext) error { dcc.offset = offset dcc.oldBase = dbRound dcc.flushTime = time.Now() + tr.log.Debugf("commitRound advancing tracker db snapshot (%d-%d)", dbRound, dbRound+basics.Round(offset)) var err error for _, lt := range tr.trackers { diff --git a/ledger/tracker_test.go b/ledger/tracker_test.go index 1c8a99ff41..9c26223c39 100644 --- a/ledger/tracker_test.go +++ b/ledger/tracker_test.go @@ -259,7 +259,7 @@ func (st *commitRoundStallingTracker) commitRound(context.Context, trackerdb.Tra // 3. Set a block in prepareCommit, and initiate the commit // 4. Set a block in produceCommittingTask, add a new block and resume the commit // 5. Resume produceCommittingTask -// 6. The data race and panic happens in block queue syncher thread +// 6. The data race and panic happens in block queue syncer thread func TestTrackers_DbRoundDataRace(t *testing.T) { partitiontest.PartitionTest(t)