-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix(engine): defer changeset static file truncation during reorgs #23291
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
2e7bcc0
fix(engine): defer changeset static file truncation during reorgs
joshieDo f0bdf57
fix: clippy warnings
joshieDo 6bc3dc0
fix: gate deferred changeset prune on storage_v2
joshieDo ce84c87
fix: collapse nested if for clippy
joshieDo d1ecfa7
feat(engine): wait for MDBX readers to drain before applying deferred…
joshieDo 5036cc4
fix(storage): reuse pinned RocksDB snapshot path
joshieDo 3f86ae3
Revert "fix(storage): reuse pinned RocksDB snapshot path"
joshieDo fc12649
docs(engine): clarify deferred changeset prune contracts
joshieDo File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,12 +2,14 @@ use crate::metrics::PersistenceMetrics; | |
| use alloy_eips::BlockNumHash; | ||
| use crossbeam_channel::Sender as CrossbeamSender; | ||
| use reth_chain_state::ExecutedBlock; | ||
| use reth_db::Database; | ||
| use reth_errors::ProviderError; | ||
| use reth_ethereum_primitives::EthPrimitives; | ||
| use reth_primitives_traits::{FastInstant as Instant, NodePrimitives}; | ||
| use reth_provider::{ | ||
| providers::ProviderNodeTypes, BlockExecutionWriter, BlockHashReader, ChainStateBlockWriter, | ||
| DBProvider, DatabaseProviderFactory, ProviderFactory, SaveBlocksMode, | ||
| StaticFileProviderFactory, StaticFileWriter, StorageSettingsCache, | ||
| }; | ||
| use reth_prune::{PrunerError, PrunerWithFactory}; | ||
| use reth_stages_api::{MetricEvent, MetricEventsSender}; | ||
|
|
@@ -32,6 +34,21 @@ pub struct PersistenceResult { | |
| pub commit_duration: Option<Duration>, | ||
| } | ||
|
|
||
| /// A deferred changeset prune from a `RemoveBlocksAbove` operation. | ||
| /// | ||
| /// Changeset static file truncation is deferred to avoid truncating memory-mapped files while | ||
| /// concurrent readers may still hold handles. The prune is applied at the start of the next | ||
| /// `SaveBlocks`, after waiting for all MDBX readers from the reorg era to drain. | ||
| #[derive(Debug)] | ||
| struct DeferredChangesetPrune { | ||
| /// The block number to prune changesets to. | ||
| target_block: u64, | ||
| /// MDBX txn ID recorded after the `RemoveBlocksAbove` commit. All readers with a txn ID | ||
| /// strictly below this value must complete before the prune can be applied safely, as | ||
| /// they hold a pre-unwind MDBX snapshot and may reference stale changeset data. | ||
| committed_txn_id: Option<u64>, | ||
| } | ||
|
|
||
| /// Writes parts of reth's in memory tree state to the database and static files. | ||
| /// | ||
| /// This is meant to be a spawned service that listens for various incoming persistence operations, | ||
|
|
@@ -60,6 +77,12 @@ where | |
| /// Pending safe block number to be committed with the next block save. | ||
| /// This avoids triggering a separate fsync for each safe block update. | ||
| pending_safe_block: Option<u64>, | ||
| /// Deferred changeset prune from a previous `RemoveBlocksAbove` operation. | ||
| /// | ||
| /// During reorgs, changeset static file truncation is deferred to the next `SaveBlocks` | ||
| /// commit to avoid truncating files while concurrent readers (payload builders, RPC) may | ||
| /// still hold stale memory-mapped file handles. | ||
| deferred_changeset_prune: Option<DeferredChangesetPrune>, | ||
| } | ||
|
|
||
| impl<N> PersistenceService<N> | ||
|
|
@@ -81,6 +104,7 @@ where | |
| sync_metrics_tx, | ||
| pending_finalized_block: None, | ||
| pending_safe_block: None, | ||
| deferred_changeset_prune: None, | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -128,7 +152,7 @@ where | |
|
|
||
| #[instrument(level = "debug", target = "engine::persistence", skip_all, fields(%new_tip_num))] | ||
| fn on_remove_blocks_above( | ||
| &self, | ||
| &mut self, | ||
| new_tip_num: u64, | ||
| ) -> Result<Option<BlockNumHash>, PersistenceError> { | ||
| debug!(target: "engine::persistence", ?new_tip_num, "Removing blocks"); | ||
|
|
@@ -137,8 +161,36 @@ where | |
|
|
||
| let new_tip_hash = provider_rw.block_hash(new_tip_num)?; | ||
| provider_rw.remove_block_and_execution_above(new_tip_num)?; | ||
|
|
||
| // Defer changeset static file truncation to the next SaveBlocks commit. | ||
| // | ||
| // `remove_block_and_execution_above` queues prune strategies on the changeset SF | ||
| // writers. If we let commit() execute them now, it would truncate the underlying | ||
| // files while concurrent readers (payload builders, RPC) may still hold stale | ||
| // memory-mapped handles — causing a panic or SIGBUS. | ||
| // | ||
| // By taking the strategies here and re-applying them later in `on_save_blocks`, | ||
| // we ensure the truncation only happens when no reader needs the old data: | ||
| // after the reorg completes and the next batch of blocks is persisted. | ||
| if self.provider.cached_storage_settings().storage_v2 && | ||
| let Some(target) = self.provider.static_file_provider().take_changeset_prunes() | ||
| { | ||
| let prev_target = self.deferred_changeset_prune.as_ref().map(|d| d.target_block); | ||
| self.deferred_changeset_prune = Some(DeferredChangesetPrune { | ||
| target_block: prev_target.map_or(target, |prev| prev.min(target)), | ||
| // txn ID will be set after commit below | ||
| committed_txn_id: None, | ||
| }); | ||
| } | ||
|
|
||
| provider_rw.commit()?; | ||
|
|
||
| // Record the MDBX txn ID after commit so we can later verify all readers from | ||
| // this era have completed before applying the deferred changeset prune. | ||
| if let Some(deferred) = &mut self.deferred_changeset_prune { | ||
| deferred.committed_txn_id = self.provider.db_ref().last_txnid(); | ||
| } | ||
|
|
||
| debug!(target: "engine::persistence", ?new_tip_num, ?new_tip_hash, "Removed blocks from disk"); | ||
| self.metrics.remove_blocks_above_duration_seconds.record(start_time.elapsed()); | ||
| Ok(new_tip_hash.map(|hash| BlockNumHash { hash, number: new_tip_num })) | ||
|
|
@@ -160,6 +212,35 @@ where | |
|
|
||
| let start_time = Instant::now(); | ||
|
|
||
| // Apply any deferred changeset prunes from a previous RemoveBlocksAbove. | ||
| // Wait for all MDBX readers from the reorg era to complete before truncating, | ||
| // so no reader can observe a stale memory-mapped file. | ||
| if let Some(deferred) = self.deferred_changeset_prune.take() { | ||
| if let Some(prune_txn) = deferred.committed_txn_id { | ||
| while self | ||
| .provider | ||
| .db_ref() | ||
| .oldest_reader_txnid() | ||
| .is_some_and(|oldest| oldest < prune_txn) | ||
|
Comment on lines
+223
to
+224
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this perhaps too relaxed? this would also include rpc related transactions? |
||
| { | ||
| debug!( | ||
| target: "engine::persistence", | ||
| target_block = deferred.target_block, | ||
| prune_txn, | ||
| oldest_reader = ?self.provider.db_ref().oldest_reader_txnid(), | ||
| "Waiting for stale readers to drain before changeset prune" | ||
| ); | ||
| std::thread::sleep(std::time::Duration::from_millis(10)); | ||
| } | ||
| } | ||
|
|
||
| debug!(target: "engine::persistence", target_block = deferred.target_block, "Applying deferred changeset prunes"); | ||
| let sf = self.provider.static_file_provider(); | ||
| sf.requeue_changeset_prunes(deferred.target_block)?; | ||
| sf.commit()?; | ||
| debug!(target: "engine::persistence", target_block = deferred.target_block, "Applied deferred changeset prunes"); | ||
| } | ||
|
|
||
| if let Some(last) = last_block { | ||
| let provider_rw = self.provider.database_provider_rw()?; | ||
| provider_rw.save_blocks(blocks, SaveBlocksMode::Full)?; | ||
|
|
@@ -374,13 +455,35 @@ impl Drop for ServiceGuard { | |
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| use alloy_primitives::B256; | ||
| use alloy_consensus::Header; | ||
| use alloy_primitives::{Address, B256, U256}; | ||
| use reth_chain_state::test_utils::TestBlockBuilder; | ||
| use reth_execution_types::{BlockExecutionOutput, BlockExecutionResult}; | ||
| use reth_exex_types::FinishedExExHeight; | ||
| use reth_provider::test_utils::create_test_provider_factory; | ||
| use reth_primitives_traits::{SealedBlock, SealedHeader}; | ||
| use reth_provider::{ | ||
| test_utils::create_test_provider_factory, ChangeSetReader, HeaderProvider, StorageSettings, | ||
| StorageSettingsCache, | ||
| }; | ||
| use reth_prune::Pruner; | ||
| use reth_revm::db::BundleState; | ||
| use revm_state::AccountInfo; | ||
| use std::sync::Arc; | ||
| use tokio::sync::mpsc::unbounded_channel; | ||
|
|
||
| fn persistence_handle_with_factory( | ||
| provider: ProviderFactory<reth_provider::test_utils::MockNodeTypesWithDB>, | ||
| ) -> PersistenceHandle<EthPrimitives> { | ||
| let (_finished_exex_height_tx, finished_exex_height_rx) = | ||
| tokio::sync::watch::channel(FinishedExExHeight::NoExExs); | ||
|
|
||
| let pruner = | ||
| Pruner::new_with_factory(provider.clone(), vec![], 5, 0, None, finished_exex_height_rx); | ||
|
|
||
| let (sync_metrics_tx, _sync_metrics_rx) = unbounded_channel(); | ||
| PersistenceHandle::<EthPrimitives>::spawn_service(provider, pruner, sync_metrics_tx) | ||
| } | ||
|
|
||
| fn default_persistence_handle() -> PersistenceHandle<EthPrimitives> { | ||
| let provider = create_test_provider_factory(); | ||
|
|
||
|
|
@@ -509,4 +612,132 @@ mod tests { | |
| let expected: Vec<u64> = (15..25).collect(); | ||
| assert_eq!(entries, expected, "new entries 20..25 must survive pruning"); | ||
| } | ||
|
|
||
| /// Verifies that changeset static file truncation is deferred during reorgs. | ||
| /// | ||
| /// Headers are truncated immediately by `RemoveBlocksAbove`, but changeset prunes are | ||
| /// deferred to the next `SaveBlocks` to avoid truncating files while concurrent readers | ||
| /// (payload builders, RPC) may still hold memory-mapped handles. | ||
| #[test] | ||
| fn test_deferred_changeset_prune_on_reorg() { | ||
| let factory = create_test_provider_factory(); | ||
| factory.set_storage_settings_cache(StorageSettings::v2()); | ||
|
|
||
| // -- Phase 1: Persist genesis + blocks 1..3 with account state changes -- | ||
|
|
||
| let genesis = SealedBlock::<reth_ethereum_primitives::Block>::from_sealed_parts( | ||
| SealedHeader::new( | ||
| Header { number: 0, difficulty: U256::from(1), ..Default::default() }, | ||
| B256::ZERO, | ||
| ), | ||
| Default::default(), | ||
| ); | ||
| let genesis_executed = ExecutedBlock::new( | ||
| Arc::new(genesis.try_recover().unwrap()), | ||
| Arc::new(BlockExecutionOutput { | ||
| result: BlockExecutionResult { | ||
| receipts: vec![], | ||
| requests: Default::default(), | ||
| gas_used: 0, | ||
| blob_gas_used: 0, | ||
| }, | ||
| state: Default::default(), | ||
| }), | ||
| Default::default(), | ||
| ); | ||
|
|
||
| let provider_rw = factory.provider_rw().unwrap(); | ||
| provider_rw.save_blocks(vec![genesis_executed], SaveBlocksMode::Full).unwrap(); | ||
| provider_rw.commit().unwrap(); | ||
|
|
||
| // Build blocks 1..3 with account changes so changesets are written to SF | ||
| let mut blocks = Vec::new(); | ||
| let mut parent_hash = B256::ZERO; | ||
| for block_num in 1..=3u64 { | ||
| let address = Address::with_last_byte(block_num as u8); | ||
| let bundle = BundleState::builder(block_num..=block_num) | ||
| .state_present_account_info( | ||
| address, | ||
| AccountInfo { | ||
| nonce: block_num, | ||
| balance: U256::from(block_num * 100), | ||
| ..Default::default() | ||
| }, | ||
| ) | ||
| .revert_account_info(block_num, address, Some(None)) | ||
| .build(); | ||
|
|
||
| let header = Header { | ||
| number: block_num, | ||
| parent_hash, | ||
| difficulty: U256::from(1), | ||
| ..Default::default() | ||
| }; | ||
| let block = SealedBlock::<reth_ethereum_primitives::Block>::seal_parts( | ||
| header, | ||
| Default::default(), | ||
| ); | ||
| parent_hash = block.hash(); | ||
|
|
||
| blocks.push(ExecutedBlock::new( | ||
| Arc::new(block.try_recover().unwrap()), | ||
| Arc::new(BlockExecutionOutput { | ||
| result: BlockExecutionResult { | ||
| receipts: vec![], | ||
| requests: Default::default(), | ||
| gas_used: 0, | ||
| blob_gas_used: 0, | ||
| }, | ||
| state: bundle, | ||
| }), | ||
| Default::default(), | ||
| )); | ||
| } | ||
|
|
||
| let handle = persistence_handle_with_factory(factory.clone()); | ||
| let (tx, rx) = crossbeam_channel::bounded(1); | ||
| handle.save_blocks(blocks, tx).unwrap(); | ||
| rx.recv_timeout(std::time::Duration::from_secs(10)).expect("save_blocks timed out"); | ||
|
|
||
| // -- Phase 2: Simulate builder getting a reader before reorg -- | ||
|
|
||
| let sf = factory.static_file_provider(); | ||
|
|
||
| // Builder reads block 2 changesets — this should work | ||
| let changesets_block2 = sf.account_block_changeset(2).unwrap(); | ||
| assert!(!changesets_block2.is_empty(), "block 2 should have changesets before reorg"); | ||
|
|
||
| // -- Phase 3: Reorg via RemoveBlocksAbove(1) -- | ||
|
|
||
| let (tx, rx) = crossbeam_channel::bounded(1); | ||
| handle.remove_blocks_above(1, tx).unwrap(); | ||
| rx.recv_timeout(std::time::Duration::from_secs(10)).expect("remove_blocks timed out"); | ||
|
|
||
| // -- Phase 4: Headers are truncated immediately, but changesets are deferred -- | ||
|
|
||
| assert!( | ||
| sf.header_by_number(2).unwrap().is_none(), | ||
| "header 2 should be gone after reorg (truncated immediately)" | ||
| ); | ||
|
|
||
| let changesets_after_reorg = sf.account_block_changeset(2).unwrap(); | ||
| assert!( | ||
| !changesets_after_reorg.is_empty(), | ||
| "block 2 changesets should still be readable after reorg (prune deferred)" | ||
| ); | ||
|
|
||
| // -- Phase 5: Next save_blocks applies the deferred prune -- | ||
|
|
||
| let (tx, rx) = crossbeam_channel::bounded(1); | ||
| handle.save_blocks(vec![], tx).unwrap(); | ||
| rx.recv_timeout(std::time::Duration::from_secs(10)).expect("save_blocks timed out"); | ||
|
|
||
| // -- Phase 6: Block 2 changesets should now be gone -- | ||
|
|
||
| let changesets_after_prune = sf.account_block_changeset(2).unwrap(); | ||
| assert!( | ||
| changesets_after_prune.is_empty(), | ||
| "block 2 changesets should be gone after deferred prune applied" | ||
| ); | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lets please not add private types to top of file :)