From 436dbd76989cb5096f50fb6b0e3f03fe2708b081 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 06:35:27 +0000 Subject: [PATCH 01/52] feat(storage): introduce storage proof worker pool - Added configuration for maximum and minimum storage proof workers. - Implemented a worker pool for processing storage proof tasks, improving efficiency by reusing transactions. - Updated `ProofTaskManager` to handle storage proof tasks via a dedicated channel. - Enhanced metrics to track storage proof requests and fallback scenarios. - Adjusted existing tests to accommodate the new storage worker functionality. --- Cargo.lock | 1 + crates/engine/primitives/src/config.rs | 42 ++ .../tree/src/tree/payload_processor/mod.rs | 2 + .../src/tree/payload_processor/multiproof.rs | 1 + crates/trie/parallel/Cargo.toml | 1 + crates/trie/parallel/src/proof.rs | 2 +- crates/trie/parallel/src/proof_task.rs | 627 ++++++++++++++++-- .../trie/parallel/src/proof_task_metrics.rs | 12 + 8 files changed, 629 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8350347b6b4..fde6f2dc3aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10739,6 +10739,7 @@ dependencies = [ "alloy-primitives", "alloy-rlp", "codspeed-criterion-compat", + "crossbeam-channel", "dashmap 6.1.0", "derive_more", "itertools 0.14.0", diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index e5f58523d03..34cffbec2b8 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -20,6 +20,15 @@ pub const DEFAULT_RESERVED_CPU_CORES: usize = 1; /// Default maximum concurrency for prewarm task. pub const DEFAULT_PREWARM_MAX_CONCURRENCY: usize = 16; +/// Maximum number of storage proof workers +const MAX_STORAGE_PROOF_WORKERS: usize = 12; + +/// Minimum number of storage proof workers +const MIN_STORAGE_PROOF_WORKERS: usize = 2; + +/// Default ratio of storage proof workers to max_proof_task_concurrency +const DEFAULT_STORAGE_PROOF_WORKER_RATIO: f32 = 0.5; + const DEFAULT_BLOCK_BUFFER_LIMIT: u32 = 256; const DEFAULT_MAX_INVALID_HEADER_CACHE_LENGTH: u32 = 256; const DEFAULT_MAX_EXECUTE_BLOCK_BATCH_SIZE: usize = 4; @@ -109,6 +118,9 @@ pub struct TreeConfig { prewarm_max_concurrency: usize, /// Whether to unwind canonical header to ancestor during forkchoice updates. allow_unwind_canonical_header: bool, + /// Number of dedicated storage proof workers. + /// If None, defaults to half of max_proof_task_concurrency. + storage_proof_workers: Option, } impl Default for TreeConfig { @@ -135,6 +147,7 @@ impl Default for TreeConfig { always_process_payload_attributes_on_canonical_head: false, prewarm_max_concurrency: DEFAULT_PREWARM_MAX_CONCURRENCY, allow_unwind_canonical_header: false, + storage_proof_workers: None, } } } @@ -164,6 +177,7 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head: bool, prewarm_max_concurrency: usize, allow_unwind_canonical_header: bool, + storage_proof_workers: Option, ) -> Self { Self { persistence_threshold, @@ -187,6 +201,7 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head, prewarm_max_concurrency, allow_unwind_canonical_header, + storage_proof_workers, } } @@ -452,4 +467,31 @@ impl TreeConfig { pub const fn prewarm_max_concurrency(&self) -> usize { self.prewarm_max_concurrency } + + /// Get the number of storage proof workers. + /// + /// Defaults to half of max_proof_task_concurrency, clamped to valid range. + pub fn storage_proof_workers(&self) -> usize { + self.storage_proof_workers.unwrap_or_else(|| { + let derived = (self.max_proof_task_concurrency as f32 * + DEFAULT_STORAGE_PROOF_WORKER_RATIO) as usize; + derived.clamp(MIN_STORAGE_PROOF_WORKERS, MAX_STORAGE_PROOF_WORKERS) + }) + } + + /// Set the number of storage proof workers explicitly. + /// + /// Value is clamped to [MIN_STORAGE_PROOF_WORKERS, MAX_STORAGE_PROOF_WORKERS]. + pub const fn with_storage_proof_workers(mut self, workers: usize) -> Self { + // Note: Can't use clamp in const fn, so we'll do manual clamping + let clamped = if workers < MIN_STORAGE_PROOF_WORKERS { + MIN_STORAGE_PROOF_WORKERS + } else if workers > MAX_STORAGE_PROOF_WORKERS { + MAX_STORAGE_PROOF_WORKERS + } else { + workers + }; + self.storage_proof_workers = Some(clamped); + self + } } diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 8d9bd1ba2e0..8aa1f0b4bfe 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -196,11 +196,13 @@ where state_root_config.prefix_sets.clone(), ); let max_proof_task_concurrency = config.max_proof_task_concurrency() as usize; + let storage_worker_count = config.storage_proof_workers(); let proof_task = ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), task_ctx, max_proof_task_concurrency, + storage_worker_count, ); // We set it to half of the proof task concurrency, because often for each multiproof we diff --git a/crates/engine/tree/src/tree/payload_processor/multiproof.rs b/crates/engine/tree/src/tree/payload_processor/multiproof.rs index 6c7f5de40a3..e435e914622 100644 --- a/crates/engine/tree/src/tree/payload_processor/multiproof.rs +++ b/crates/engine/tree/src/tree/payload_processor/multiproof.rs @@ -1236,6 +1236,7 @@ mod tests { config.consistent_view.clone(), task_ctx, 1, + 1, // storage_worker_count: 1 for tests ); let channel = channel(); diff --git a/crates/trie/parallel/Cargo.toml b/crates/trie/parallel/Cargo.toml index c9f625a1500..b4463d9ede3 100644 --- a/crates/trie/parallel/Cargo.toml +++ b/crates/trie/parallel/Cargo.toml @@ -36,6 +36,7 @@ derive_more.workspace = true rayon.workspace = true itertools.workspace = true tokio = { workspace = true, features = ["rt-multi-thread"] } +crossbeam-channel.workspace = true # `metrics` feature reth-metrics = { workspace = true, optional = true } diff --git a/crates/trie/parallel/src/proof.rs b/crates/trie/parallel/src/proof.rs index d6e1b57ed9b..4c9a3a57cf8 100644 --- a/crates/trie/parallel/src/proof.rs +++ b/crates/trie/parallel/src/proof.rs @@ -448,7 +448,7 @@ mod tests { let task_ctx = ProofTaskCtx::new(Default::default(), Default::default(), Default::default()); let proof_task = - ProofTaskManager::new(rt.handle().clone(), consistent_view.clone(), task_ctx, 1); + ProofTaskManager::new(rt.handle().clone(), consistent_view.clone(), task_ctx, 1, 1); let proof_task_handle = proof_task.handle(); // keep the join handle around to make sure it does not return any errors diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 9bb96d4b19e..4f4f0e746df 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -10,6 +10,7 @@ use crate::root::ParallelStateRootError; use alloy_primitives::{map::B256Set, B256}; +use crossbeam_channel::{bounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender}; use reth_db_api::transaction::DbTx; use reth_execution_errors::SparseTrieError; use reth_provider::{ @@ -48,68 +49,403 @@ use crate::proof_task_metrics::ProofTaskMetrics; type StorageProofResult = Result; type TrieNodeProviderResult = Result, SparseTrieError>; -/// A task that manages sending multiproof requests to a number of tasks that have longer-running -/// database transactions +/// Internal message for storage proof workers. +/// +/// This is NOT exposed publicly. External callers still use `ProofTaskKind::StorageProof` +/// which is routed through the manager's std::mpsc channel. +#[derive(Debug)] +struct StorageProofJob { + /// Storage proof input parameters + input: StorageProofInput, + /// Channel to send result back to original caller + /// + /// This is the same std::mpsc::Sender that the external caller provided in + /// ProofTaskKind::StorageProof(input, sender). + result_sender: Sender, +} + +/// Internal message for on-demand task execution. +/// +/// These tasks are executed with lazily-created transactions that are +/// returned to the pool after use (same as current behavior). +#[derive(Debug)] +enum OnDemandTask { + /// Fetch a blinded account node by path + BlindedAccountNode(Nibbles, Sender), + /// Fetch a blinded storage node by account and path + BlindedStorageNode(B256, Nibbles, Sender), +} + +/// A task that manages sending proof requests to worker pools and on-demand tasks. +/// +/// # Architecture (PR1: Storage Workers Only) +/// +/// This manager maintains two execution paths: +/// +/// 1. **Storage Worker Pool** (NEW): +/// - Pre-spawned workers with dedicated long-lived transactions +/// - Tasks queued via crossbeam bounded channel +/// - Workers continuously process without transaction return overhead +/// +/// 2. **On-Demand Execution** (EXISTING): +/// - Lazy transaction creation for blinded node fetches +/// - Transactions returned to pool after use (original behavior) +/// - Same message-passing mechanism as before +/// +/// # External API +/// +/// The external API via `ProofTaskManagerHandle` is COMPLETELY UNCHANGED: +/// - `queue_task(ProofTaskKind)` signature identical +/// - Same std::mpsc message passing +/// - Same return types and error handling +/// +/// All changes are internal routing optimizations. #[derive(Debug)] pub struct ProofTaskManager { - /// Max number of database transactions to create - max_concurrency: usize, - /// Number of database transactions created - total_transactions: usize, - /// Consistent view provider used for creating transactions on-demand + // ==================== STORAGE WORKER POOL (NEW) ==================== + /// Sender for storage proof tasks to worker pool. + /// + /// Queue capacity = storage_worker_count * 2 (for 2x buffering) + storage_work_tx: CrossbeamSender, + + /// Number of storage workers successfully spawned. + /// + /// May be less than requested if transaction creation fails. + storage_worker_count: usize, + + // ==================== ON-DEMAND TRANSACTION POOL (REFACTORED) ==================== + /// Maximum number of on-demand transactions for blinded node fetches. + /// + /// Calculated as: max_concurrency - storage_worker_count + max_on_demand_txs: usize, + + /// Currently available on-demand transactions (reused after return). + /// + /// Same lifecycle as before PR1. + on_demand_txs: Vec>>, + + /// Total on-demand transactions created (for ID assignment). + on_demand_tx_count: usize, + + /// Queue of pending on-demand tasks waiting for available transaction. + /// + /// Replaces the old `pending_tasks` VecDeque which held all task types. + /// TODO: Change to VecDeque in Phase 8 when implementing proper task routing + pending_on_demand: VecDeque, + + // ==================== SHARED RESOURCES ==================== + /// Consistent view provider used for creating transactions on-demand. view: ConsistentDbView, - /// Proof task context shared across all proof tasks + + /// Proof task context shared across all proof tasks. task_ctx: ProofTaskCtx, - /// Proof tasks pending execution - pending_tasks: VecDeque, - /// The underlying handle from which to spawn proof tasks + + /// The underlying handle from which to spawn proof tasks. executor: Handle, - /// The proof task transactions, containing owned cursor factories that are reused for proof - /// calculation. - proof_task_txs: Vec>>, - /// A receiver for new proof tasks. + + /// A receiver for new proof task messages from external callers. + /// + /// This is the std::mpsc channel connected to ProofTaskManagerHandle. + /// UNCHANGED - maintains interface compatibility. proof_task_rx: Receiver>>, - /// A sender for sending back transactions. + + /// A sender for internal messaging (transaction returns). + /// + /// Used by on-demand tasks to return transactions to pool. tx_sender: Sender>>, + /// The number of active handles. /// /// Incremented in [`ProofTaskManagerHandle::new`] and decremented in /// [`ProofTaskManagerHandle::drop`]. active_handles: Arc, - /// Metrics tracking blinded node fetches. + + /// Metrics tracking proof task operations. #[cfg(feature = "metrics")] metrics: ProofTaskMetrics, } -impl ProofTaskManager { - /// Creates a new [`ProofTaskManager`] with the given max concurrency, creating that number of - /// cursor factories. +/// Worker loop for storage proof computation. +/// +/// # Lifecycle +/// +/// Each worker: +/// 1. Receives `StorageProofJob` from crossbeam bounded channel +/// 2. Computes proof using its dedicated long-lived transaction +/// 3. Sends result directly to original caller via std::mpsc +/// 4. Repeats until channel closes (graceful shutdown) +/// +/// # Transaction Reuse +/// +/// The key optimization: the worker reuses the same `proof_tx` across ALL proofs, +/// avoiding the overhead of: +/// - Creating new database transactions +/// - Setting up cursor factories +/// - Returning transactions to a pool +/// +/// # Panic Safety +/// +/// If this function panics, the worker thread terminates but: +/// - Other workers continue operating +/// - The manager detects disconnection when trying to send +/// - System degrades gracefully rather than failing completely +/// +/// # Shutdown +/// +/// Worker shuts down when: +/// - Crossbeam channel closes (all senders dropped) +/// - `ProofTaskManager::run()` drops `storage_work_tx` on terminate +fn storage_worker_loop( + proof_tx: ProofTaskTx, + work_rx: CrossbeamReceiver, + worker_id: usize, +) where + Tx: DbTx, +{ + tracing::debug!( + target: "trie::proof_task", + worker_id, + "Storage proof worker started" + ); + + let mut proofs_processed = 0u64; + let start_time = Instant::now(); + + // Main worker loop: process jobs until channel closes + while let Ok(StorageProofJob { input, result_sender }) = work_rx.recv() { + let proof_start = Instant::now(); + + trace!( + target: "trie::proof_task", + worker_id, + hashed_address = ?input.hashed_address, + prefix_set_len = input.prefix_set.len(), + target_slots = input.target_slots.len(), + "Processing storage proof" + ); + + // ==================== CORE COMPUTATION ==================== + // Compute storage proof using reused transaction + // This is the key difference from on-demand execution: + // - No transaction creation overhead + // - No transaction return message + // - Cursor factories reused across proofs + let result = proof_tx.compute_storage_proof(&input); + + let proof_elapsed = proof_start.elapsed(); + proofs_processed += 1; + + // ==================== RESULT DELIVERY ==================== + // Send result directly to original caller's std::mpsc::Receiver + // If receiver is dropped (caller cancelled), log and continue + if let Err(_) = result_sender.send(result) { + tracing::debug!( + target: "trie::proof_task", + worker_id, + hashed_address = ?input.hashed_address, + proofs_processed, + "Storage proof receiver dropped, discarding result" + ); + } + + trace!( + target: "trie::proof_task", + worker_id, + hashed_address = ?input.hashed_address, + proof_time_us = proof_elapsed.as_micros(), + total_processed = proofs_processed, + "Storage proof completed" + ); + } + + // Channel closed - graceful shutdown + let total_elapsed = start_time.elapsed(); + + tracing::info!( + target: "trie::proof_task", + worker_id, + proofs_processed, + uptime_secs = total_elapsed.as_secs(), + avg_proof_time_ms = if proofs_processed > 0 { + total_elapsed.as_millis() / proofs_processed as u128 + } else { + 0 + }, + "Storage proof worker shutting down" + ); +} + +impl ProofTaskManager +where + Factory: DatabaseProviderFactory, +{ + /// Creates a new [`ProofTaskManager`] with the given configuration. + /// + /// # Arguments + /// + /// * `executor` - Tokio runtime handle for spawning workers and tasks + /// * `view` - Consistent database view for creating read-only transactions + /// * `task_ctx` - Shared context (trie updates, hashed state, prefix sets) + /// * `max_concurrency` - Total transaction budget across all execution paths + /// * `storage_worker_count` - Number of storage proof workers to pre-spawn + /// + /// # Transaction Budget Allocation + /// + /// The total `max_concurrency` is split between two pools: + /// + /// 1. **Storage Workers**: `storage_worker_count` transactions (pre-allocated) + /// 2. **On-Demand Pool**: `max_concurrency - storage_worker_count` (lazy) + /// + /// Example: + /// ```text + /// max_concurrency = 8, storage_worker_count = 4 + /// → 4 storage workers (pre-spawned) + /// → 4 on-demand transactions (created lazily for blinded nodes) + /// Total: 8 transactions max (same capacity as before) + /// ``` /// - /// Returns an error if the consistent view provider fails to create a read-only transaction. + /// # Worker Spawn Resilience + /// + /// If some workers fail to spawn (e.g., transaction creation error): + /// - Failed workers are logged and skipped + /// - On-demand pool is adjusted: `max_concurrency - actual_spawned_workers` + /// - System continues with fewer workers rather than failing entirely + /// + /// # Panics + /// + /// Does not panic. All errors are logged and handled gracefully. pub fn new( executor: Handle, view: ConsistentDbView, task_ctx: ProofTaskCtx, max_concurrency: usize, + storage_worker_count: usize, ) -> Self { + // Create message channel for external callers (UNCHANGED) let (tx_sender, proof_task_rx) = channel(); - Self { + + // ==================== STORAGE WORKER POOL SETUP ==================== + + // Queue capacity: 2x buffering to reduce contention + // If workers = 4, queue holds 8 tasks maximum + let queue_capacity = storage_worker_count.saturating_mul(2).max(1); + let (storage_work_tx, storage_work_rx) = bounded::(queue_capacity); + + tracing::info!( + target: "trie::proof_task", + storage_worker_count, + queue_capacity, max_concurrency, - total_transactions: 0, + "Initializing storage proof worker pool" + ); + + // Spawn storage workers - each gets its own long-lived transaction + let mut spawned_workers = 0; + for worker_id in 0..storage_worker_count { + // Try to create transaction for this worker + match view.provider_ro() { + Ok(provider_ro) => { + let tx = provider_ro.into_tx(); + let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); + let work_rx = storage_work_rx.clone(); + + // Spawn worker on tokio blocking pool + executor.spawn_blocking(move || { + storage_worker_loop(proof_task_tx, work_rx, worker_id) + }); + + spawned_workers += 1; + + tracing::debug!( + target: "trie::proof_task", + worker_id, + spawned_workers, + "Storage worker spawned successfully" + ); + } + Err(err) => { + // Non-fatal: log and continue with fewer workers + tracing::warn!( + target: "trie::proof_task", + worker_id, + ?err, + requested = storage_worker_count, + spawned_workers, + "Failed to create transaction for storage worker, continuing with fewer workers" + ); + } + } + } + + // Verify we spawned at least some workers + if spawned_workers == 0 { + tracing::error!( + target: "trie::proof_task", + requested = storage_worker_count, + "Failed to spawn any storage workers - all will use on-demand pool" + ); + } else if spawned_workers < storage_worker_count { + tracing::warn!( + target: "trie::proof_task", + requested = storage_worker_count, + spawned = spawned_workers, + "Spawned fewer storage workers than requested" + ); + } else { + tracing::info!( + target: "trie::proof_task", + spawned_workers, + queue_capacity, + "Storage worker pool initialized successfully" + ); + } + + // ==================== ON-DEMAND POOL SETUP ==================== + + // Calculate on-demand budget: remaining capacity after storage workers + // Ensure at least 1 on-demand transaction even if storage workers consume all budget + let max_on_demand_txs = max_concurrency.saturating_sub(spawned_workers).max(1); + + tracing::debug!( + target: "trie::proof_task", + max_on_demand_txs, + storage_workers = spawned_workers, + total_capacity = max_concurrency, + "Configured on-demand transaction pool for blinded nodes" + ); + + // ==================== CONSTRUCT MANAGER ==================== + + Self { + // Storage worker pool + storage_work_tx, + storage_worker_count: spawned_workers, + + // On-demand pool + max_on_demand_txs, + on_demand_txs: Vec::with_capacity(max_on_demand_txs), + on_demand_tx_count: 0, + pending_on_demand: VecDeque::new(), + + // Shared resources view, task_ctx, - pending_tasks: VecDeque::new(), executor, - proof_task_txs: Vec::new(), proof_task_rx, tx_sender, active_handles: Arc::new(AtomicUsize::new(0)), + #[cfg(feature = "metrics")] metrics: ProofTaskMetrics::default(), } } - /// Returns a handle for sending new proof tasks to the [`ProofTaskManager`]. + /// Returns a handle for sending new proof tasks to the manager. + /// + /// # Interface Compatibility + /// + /// This method is UNCHANGED from the original implementation. The returned + /// `ProofTaskManagerHandle` has the exact same public API as before PR1. pub fn handle(&self) -> ProofTaskManagerHandle> { ProofTaskManagerHandle::new(self.tx_sender.clone(), self.active_handles.clone()) } @@ -121,22 +457,22 @@ where { /// Inserts the task into the pending tasks queue. pub fn queue_proof_task(&mut self, task: ProofTaskKind) { - self.pending_tasks.push_back(task); + self.pending_on_demand.push_back(task); } /// Gets either the next available transaction, or creates a new one if all are in use and the /// total number of transactions created is less than the max concurrency. pub fn get_or_create_tx(&mut self) -> ProviderResult>>> { - if let Some(proof_task_tx) = self.proof_task_txs.pop() { + if let Some(proof_task_tx) = self.on_demand_txs.pop() { return Ok(Some(proof_task_tx)); } // if we can create a new tx within our concurrency limits, create one on-demand - if self.total_transactions < self.max_concurrency { + if self.on_demand_tx_count < self.max_on_demand_txs { let provider_ro = self.view.provider_ro()?; let tx = provider_ro.into_tx(); - self.total_transactions += 1; - return Ok(Some(ProofTaskTx::new(tx, self.task_ctx.clone(), self.total_transactions))); + self.on_demand_tx_count += 1; + return Ok(Some(ProofTaskTx::new(tx, self.task_ctx.clone(), self.on_demand_tx_count))); } Ok(None) @@ -148,11 +484,11 @@ where /// This will return an error if a transaction must be created on-demand and the consistent view /// provider fails. pub fn try_spawn_next(&mut self) -> ProviderResult<()> { - let Some(task) = self.pending_tasks.pop_front() else { return Ok(()) }; + let Some(task) = self.pending_on_demand.pop_front() else { return Ok(()) }; let Some(proof_task_tx) = self.get_or_create_tx()? else { // if there are no txs available, requeue the proof task - self.pending_tasks.push_front(task); + self.pending_on_demand.push_front(task); return Ok(()) }; @@ -173,42 +509,121 @@ where } /// Loops, managing the proof tasks, and sending new tasks to the executor. + /// + /// # Task Routing + /// + /// - **Storage Proofs**: Routed to pre-spawned worker pool via bounded channel + /// - If channel is full, falls back to on-demand spawn + /// - **Blinded Nodes**: Queued for on-demand execution (original behavior) + /// + /// # Worker Pool Lifecycle + /// + /// On termination, `storage_work_tx` is dropped, closing the channel and signaling + /// all workers to shut down gracefully. pub fn run(mut self) -> ProviderResult<()> { loop { match self.proof_task_rx.recv() { - Ok(message) => match message { - ProofTaskMessage::QueueTask(task) => { - // Track metrics for blinded node requests - #[cfg(feature = "metrics")] - match &task { - ProofTaskKind::BlindedAccountNode(_, _) => { - self.metrics.account_nodes += 1; + Ok(message) => { + match message { + ProofTaskMessage::QueueTask(task) => { + match task { + // ==================== STORAGE PROOF ROUTING ==================== + ProofTaskKind::StorageProof(input, sender) => { + #[cfg(feature = "metrics")] + { + self.metrics.storage_proofs += 1; + } + + // Try to send to worker pool first + match self + .storage_work_tx + .try_send(StorageProofJob { input, result_sender: sender }) + { + Ok(_) => { + // Successfully queued to worker pool + tracing::trace!( + target: "trie::proof_task", + "Storage proof dispatched to worker pool" + ); + } + Err(crossbeam_channel::TrySendError::Full(job)) => { + // Channel full - fall back to on-demand spawn + tracing::debug!( + target: "trie::proof_task", + "Worker pool queue full, spawning on-demand" + ); + + #[cfg(feature = "metrics")] + { + self.metrics.on_demand_fallback += 1; + } + + // Queue for on-demand execution + self.pending_on_demand.push_back( + ProofTaskKind::StorageProof( + job.input, + job.result_sender, + ), + ); + } + Err(crossbeam_channel::TrySendError::Disconnected(_)) => { + // Workers shut down - this should not happen + tracing::error!( + target: "trie::proof_task", + "Worker pool disconnected unexpectedly" + ); + return Err(reth_storage_errors::provider::ProviderError::Database( + reth_db_api::DatabaseError::Other("Worker pool disconnected".into()) + )) + } + } + } + + // ==================== BLINDED NODE ROUTING ==================== + ProofTaskKind::BlindedAccountNode(_, _) => { + #[cfg(feature = "metrics")] + { + self.metrics.account_nodes += 1; + } + self.queue_proof_task(task); + } + ProofTaskKind::BlindedStorageNode(_, _, _) => { + #[cfg(feature = "metrics")] + { + self.metrics.storage_nodes += 1; + } + self.queue_proof_task(task); + } } - ProofTaskKind::BlindedStorageNode(_, _, _) => { - self.metrics.storage_nodes += 1; - } - _ => {} } - // queue the task - self.queue_proof_task(task) - } - ProofTaskMessage::Transaction(tx) => { - // return the transaction to the pool - self.proof_task_txs.push(tx); - } - ProofTaskMessage::Terminate => { - // Record metrics before terminating - #[cfg(feature = "metrics")] - self.metrics.record(); - return Ok(()) + ProofTaskMessage::Transaction(tx) => { + // Return transaction to on-demand pool + self.on_demand_txs.push(tx); + } + ProofTaskMessage::Terminate => { + // Drop storage_work_tx to signal workers to shut down + drop(self.storage_work_tx); + + tracing::info!( + target: "trie::proof_task", + storage_worker_count = self.storage_worker_count, + "Shutting down proof task manager, signaling workers to terminate" + ); + + // Record metrics before terminating + #[cfg(feature = "metrics")] + self.metrics.record(); + + return Ok(()) + } } - }, + } // All senders are disconnected, so we can terminate // However this should never happen, as this struct stores a sender Err(_) => return Ok(()), }; - // try spawning the next task + // Try spawning on-demand tasks only (storage proofs handled by worker pool) self.try_spawn_next()?; } } @@ -260,7 +675,103 @@ where (trie_cursor_factory, hashed_cursor_factory) } + /// Compute storage proof without consuming self (for worker pool reuse). + /// + /// # Purpose + /// + /// This method enables transaction reuse in the storage worker pool. Unlike the + /// original `storage_proof(self, ...)` which consumes self and returns the + /// transaction to a pool, this method: + /// + /// 1. Borrows self immutably + /// 2. Computes the proof using the owned transaction + /// 3. Returns only the result (transaction remains owned) + /// 4. Can be called repeatedly on the same ProofTaskTx instance + /// + /// # Usage + /// + /// This is called exclusively by storage workers in the worker pool. On-demand + /// execution still uses the original `storage_proof(self, ...)` method which + /// consumes self and returns the transaction. + /// + /// # Performance + /// + /// By reusing the same transaction and cursor factories across multiple proofs: + /// - Eliminates per-proof transaction creation overhead + /// - Avoids message passing to return transactions + /// - Reduces memory allocations for cursor factories + fn compute_storage_proof(&self, input: &StorageProofInput) -> StorageProofResult { + // ==================== SETUP ==================== + + // Create cursor factories (same as original implementation) + let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); + + // Get or create added/removed keys context + let multi_added_removed_keys = input + .multi_added_removed_keys + .clone() + .unwrap_or_else(|| Arc::new(MultiAddedRemovedKeys::new())); + let added_removed_keys = multi_added_removed_keys.get_storage(&input.hashed_address); + + let span = tracing::trace_span!( + target: "trie::proof_task", + "Storage proof calculation", + hashed_address = ?input.hashed_address, + // Worker ID embedded in ProofTaskTx for trace correlation + worker_id = self.id, + ); + let _guard = span.enter(); + + let target_slots_len = input.target_slots.len(); + let proof_start = Instant::now(); + + // Compute raw storage multiproof (identical to original) + let raw_proof_result = StorageProof::new_hashed( + trie_cursor_factory, + hashed_cursor_factory, + input.hashed_address, + ) + .with_prefix_set_mut(PrefixSetMut::from(input.prefix_set.iter().copied())) + .with_branch_node_masks(input.with_branch_node_masks) + .with_added_removed_keys(added_removed_keys) + .storage_multiproof(input.target_slots.clone()) + .map_err(|e| ParallelStateRootError::Other(e.to_string())); + + // Decode proof into DecodedStorageMultiProof + let decoded_result = raw_proof_result.and_then(|raw_proof| { + raw_proof.try_into().map_err(|e: alloy_rlp::Error| { + ParallelStateRootError::Other(format!( + "Failed to decode storage proof for {}: {}", + input.hashed_address, e + )) + }) + }); + + trace!( + target: "trie::proof_task", + hashed_address = ?input.hashed_address, + prefix_set_len = input.prefix_set.len(), + target_slots = target_slots_len, + proof_time_us = proof_start.elapsed().as_micros(), + worker_id = self.id, + "Completed storage proof calculation" + ); + + decoded_result + + // NOTE: self is NOT consumed - transaction remains owned by worker + // No ProofTaskMessage::Transaction sent + } + /// Calculates a storage proof for the given hashed address, and desired prefix set. + /// + /// **ON-DEMAND VARIANT** - Consumes self, returns transaction to pool. + /// + /// This method is NO LONGER CALLED for storage proofs from the worker pool, + /// but is kept for: + /// 1. Backward compatibility with any direct callers + /// 2. Future use cases that need one-off storage proofs + /// 3. Tests that rely on the transaction return mechanism fn storage_proof( self, input: StorageProofInput, diff --git a/crates/trie/parallel/src/proof_task_metrics.rs b/crates/trie/parallel/src/proof_task_metrics.rs index cdb59d078d8..97ec8e6f113 100644 --- a/crates/trie/parallel/src/proof_task_metrics.rs +++ b/crates/trie/parallel/src/proof_task_metrics.rs @@ -9,6 +9,10 @@ pub struct ProofTaskMetrics { pub account_nodes: usize, /// Count of blinded storage node requests. pub storage_nodes: usize, + /// Count of storage proof requests routed to worker pool. + pub storage_proofs: usize, + /// Count of times worker pool was full and fell back to on-demand execution. + pub on_demand_fallback: usize, } impl ProofTaskMetrics { @@ -16,6 +20,7 @@ impl ProofTaskMetrics { pub fn record(&self) { self.task_metrics.record_account_nodes(self.account_nodes); self.task_metrics.record_storage_nodes(self.storage_nodes); + self.task_metrics.record_storage_proofs(self.storage_proofs); } } @@ -27,6 +32,8 @@ pub struct ProofTaskTrieMetrics { blinded_account_nodes: Histogram, /// A histogram for the number of blinded storage nodes fetched. blinded_storage_nodes: Histogram, + /// A histogram for the number of storage proofs computed via worker pool. + storage_proofs: Histogram, } impl ProofTaskTrieMetrics { @@ -39,4 +46,9 @@ impl ProofTaskTrieMetrics { pub fn record_storage_nodes(&self, count: usize) { self.blinded_storage_nodes.record(count as f64); } + + /// Record storage proofs computed via worker pool. + pub fn record_storage_proofs(&self, count: usize) { + self.storage_proofs.record(count as f64); + } } From fbeec5063d0a031e1da1c8cedbdae27ec58cceab Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 06:42:13 +0000 Subject: [PATCH 02/52] fmt, clippy --- crates/engine/primitives/src/config.rs | 8 ++++---- crates/trie/parallel/src/proof_task.rs | 25 +++++++++++++------------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index 34cffbec2b8..491583699b7 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -26,7 +26,7 @@ const MAX_STORAGE_PROOF_WORKERS: usize = 12; /// Minimum number of storage proof workers const MIN_STORAGE_PROOF_WORKERS: usize = 2; -/// Default ratio of storage proof workers to max_proof_task_concurrency +/// Default ratio of storage proof workers to `max_proof_task_concurrency` const DEFAULT_STORAGE_PROOF_WORKER_RATIO: f32 = 0.5; const DEFAULT_BLOCK_BUFFER_LIMIT: u32 = 256; @@ -119,7 +119,7 @@ pub struct TreeConfig { /// Whether to unwind canonical header to ancestor during forkchoice updates. allow_unwind_canonical_header: bool, /// Number of dedicated storage proof workers. - /// If None, defaults to half of max_proof_task_concurrency. + /// If None, defaults to half of `max_proof_task_concurrency`. storage_proof_workers: Option, } @@ -470,7 +470,7 @@ impl TreeConfig { /// Get the number of storage proof workers. /// - /// Defaults to half of max_proof_task_concurrency, clamped to valid range. + /// Defaults to half of `max_proof_task_concurrency`, clamped to valid range. pub fn storage_proof_workers(&self) -> usize { self.storage_proof_workers.unwrap_or_else(|| { let derived = (self.max_proof_task_concurrency as f32 * @@ -481,7 +481,7 @@ impl TreeConfig { /// Set the number of storage proof workers explicitly. /// - /// Value is clamped to [MIN_STORAGE_PROOF_WORKERS, MAX_STORAGE_PROOF_WORKERS]. + /// Value is clamped to [`MIN_STORAGE_PROOF_WORKERS`, `MAX_STORAGE_PROOF_WORKERS`]. pub const fn with_storage_proof_workers(mut self, workers: usize) -> Self { // Note: Can't use clamp in const fn, so we'll do manual clamping let clamped = if workers < MIN_STORAGE_PROOF_WORKERS { diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 4f4f0e746df..82584916ceb 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -52,15 +52,15 @@ type TrieNodeProviderResult = Result, SparseTrieError>; /// Internal message for storage proof workers. /// /// This is NOT exposed publicly. External callers still use `ProofTaskKind::StorageProof` -/// which is routed through the manager's std::mpsc channel. +/// which is routed through the manager's `std::mpsc` channel. #[derive(Debug)] struct StorageProofJob { /// Storage proof input parameters input: StorageProofInput, /// Channel to send result back to original caller /// - /// This is the same std::mpsc::Sender that the external caller provided in - /// ProofTaskKind::StorageProof(input, sender). + /// This is the same `std::mpsc::Sender` that the external caller provided in + /// `ProofTaskKind::StorageProof(input`, sender). result_sender: Sender, } @@ -69,6 +69,7 @@ struct StorageProofJob { /// These tasks are executed with lazily-created transactions that are /// returned to the pool after use (same as current behavior). #[derive(Debug)] +#[allow(dead_code)] enum OnDemandTask { /// Fetch a blinded account node by path BlindedAccountNode(Nibbles, Sender), @@ -96,7 +97,7 @@ enum OnDemandTask { /// /// The external API via `ProofTaskManagerHandle` is COMPLETELY UNCHANGED: /// - `queue_task(ProofTaskKind)` signature identical -/// - Same std::mpsc message passing +/// - Same `std::mpsc` message passing /// - Same return types and error handling /// /// All changes are internal routing optimizations. @@ -105,7 +106,7 @@ pub struct ProofTaskManager { // ==================== STORAGE WORKER POOL (NEW) ==================== /// Sender for storage proof tasks to worker pool. /// - /// Queue capacity = storage_worker_count * 2 (for 2x buffering) + /// Queue capacity = `storage_worker_count` * 2 (for 2x buffering) storage_work_tx: CrossbeamSender, /// Number of storage workers successfully spawned. @@ -116,7 +117,7 @@ pub struct ProofTaskManager { // ==================== ON-DEMAND TRANSACTION POOL (REFACTORED) ==================== /// Maximum number of on-demand transactions for blinded node fetches. /// - /// Calculated as: max_concurrency - storage_worker_count + /// Calculated as: `max_concurrency` - `storage_worker_count` max_on_demand_txs: usize, /// Currently available on-demand transactions (reused after return). @@ -129,8 +130,8 @@ pub struct ProofTaskManager { /// Queue of pending on-demand tasks waiting for available transaction. /// - /// Replaces the old `pending_tasks` VecDeque which held all task types. - /// TODO: Change to VecDeque in Phase 8 when implementing proper task routing + /// Replaces the old `pending_tasks` `VecDeque` which held all task types. + /// TODO: Change to `VecDeque` in Phase 8 when implementing proper task routing pending_on_demand: VecDeque, // ==================== SHARED RESOURCES ==================== @@ -145,7 +146,7 @@ pub struct ProofTaskManager { /// A receiver for new proof task messages from external callers. /// - /// This is the std::mpsc channel connected to ProofTaskManagerHandle. + /// This is the `std::mpsc` channel connected to [`ProofTaskManagerHandle`]. /// UNCHANGED - maintains interface compatibility. proof_task_rx: Receiver>>, @@ -172,7 +173,7 @@ pub struct ProofTaskManager { /// Each worker: /// 1. Receives `StorageProofJob` from crossbeam bounded channel /// 2. Computes proof using its dedicated long-lived transaction -/// 3. Sends result directly to original caller via std::mpsc +/// 3. Sends result directly to original caller via `std::mpsc` /// 4. Repeats until channel closes (graceful shutdown) /// /// # Transaction Reuse @@ -238,7 +239,7 @@ fn storage_worker_loop( // ==================== RESULT DELIVERY ==================== // Send result directly to original caller's std::mpsc::Receiver // If receiver is dropped (caller cancelled), log and continue - if let Err(_) = result_sender.send(result) { + if result_sender.send(result).is_err() { tracing::debug!( target: "trie::proof_task", worker_id, @@ -686,7 +687,7 @@ where /// 1. Borrows self immutably /// 2. Computes the proof using the owned transaction /// 3. Returns only the result (transaction remains owned) - /// 4. Can be called repeatedly on the same ProofTaskTx instance + /// 4. Can be called repeatedly on the same [`ProofTaskTx`] instance /// /// # Usage /// From ab823866fbff3ea82ed0e76c4998c7739ddee9da Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:00:34 +0000 Subject: [PATCH 03/52] add fallback - Enhanced documentation for `StorageProofJob` to clarify its current unused status and potential for future type-safe design. - Updated comments in `ProofTaskManager` regarding the handling of on-demand tasks and the possibility of refactoring to a more type-safe enum. - Improved logging for worker pool disconnection scenarios, emphasizing fallback to on-demand execution. --- crates/trie/parallel/src/proof_task.rs | 37 +++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 82584916ceb..1d8019171ab 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -66,7 +66,12 @@ struct StorageProofJob { /// Internal message for on-demand task execution. /// -/// These tasks are executed with lazily-created transactions that are +/// **Note**: Currently unused in favor of [`ProofTaskKind`] for simplicity and backwards +/// compatibility. This enum represents a more type-safe design where on-demand tasks +/// (blinded nodes) are strictly separated from worker pool tasks (storage proofs). +/// Available for future refactoring if stricter type safety is desired. +/// +/// These tasks would be executed with lazily-created transactions that are /// returned to the pool after use (same as current behavior). #[derive(Debug)] #[allow(dead_code)] @@ -131,7 +136,9 @@ pub struct ProofTaskManager { /// Queue of pending on-demand tasks waiting for available transaction. /// /// Replaces the old `pending_tasks` `VecDeque` which held all task types. - /// TODO: Change to `VecDeque` in Phase 8 when implementing proper task routing + /// Currently holds `ProofTaskKind` for both blinded node fetches and storage proof + /// fallbacks (when worker pool is full/unavailable). Could be refactored to use + /// the more type-safe `OnDemandTask` enum if strict separation is desired. pending_on_demand: VecDeque, // ==================== SHARED RESOURCES ==================== @@ -567,15 +574,27 @@ where ), ); } - Err(crossbeam_channel::TrySendError::Disconnected(_)) => { - // Workers shut down - this should not happen - tracing::error!( + Err(crossbeam_channel::TrySendError::Disconnected(job)) => { + // No workers available (likely all spawns failed) - + // fall back to on-demand + tracing::warn!( target: "trie::proof_task", - "Worker pool disconnected unexpectedly" + storage_worker_count = self.storage_worker_count, + "Worker pool disconnected (no workers available), falling back to on-demand" + ); + + #[cfg(feature = "metrics")] + { + self.metrics.on_demand_fallback += 1; + } + + // Queue for on-demand execution instead of failing + self.pending_on_demand.push_back( + ProofTaskKind::StorageProof( + job.input, + job.result_sender, + ), ); - return Err(reth_storage_errors::provider::ProviderError::Database( - reth_db_api::DatabaseError::Other("Worker pool disconnected".into()) - )) } } } From 13891ad1ead85ffc44c1e5ad52b35a475f48cdf9 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:09:51 +0000 Subject: [PATCH 04/52] fix comments --- crates/engine/primitives/src/config.rs | 25 +- crates/trie/parallel/src/proof_task.rs | 385 ++++++++++--------------- 2 files changed, 173 insertions(+), 237 deletions(-) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index 491583699b7..10d93676f5e 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -23,9 +23,6 @@ pub const DEFAULT_PREWARM_MAX_CONCURRENCY: usize = 16; /// Maximum number of storage proof workers const MAX_STORAGE_PROOF_WORKERS: usize = 12; -/// Minimum number of storage proof workers -const MIN_STORAGE_PROOF_WORKERS: usize = 2; - /// Default ratio of storage proof workers to `max_proof_task_concurrency` const DEFAULT_STORAGE_PROOF_WORKER_RATIO: f32 = 0.5; @@ -470,24 +467,32 @@ impl TreeConfig { /// Get the number of storage proof workers. /// - /// Defaults to half of `max_proof_task_concurrency`, clamped to valid range. + /// Defaults to half of `max_proof_task_concurrency`, clamped to valid range and leaving at + /// least one slot for on-demand work. pub fn storage_proof_workers(&self) -> usize { + let max_allowed = self.max_proof_task_concurrency.saturating_sub(1) as usize; + if max_allowed == 0 { + return 0; + } + self.storage_proof_workers.unwrap_or_else(|| { let derived = (self.max_proof_task_concurrency as f32 * DEFAULT_STORAGE_PROOF_WORKER_RATIO) as usize; - derived.clamp(MIN_STORAGE_PROOF_WORKERS, MAX_STORAGE_PROOF_WORKERS) + let capped = derived.min(MAX_STORAGE_PROOF_WORKERS); + + capped.clamp(1, max_allowed) }) } /// Set the number of storage proof workers explicitly. /// - /// Value is clamped to [`MIN_STORAGE_PROOF_WORKERS`, `MAX_STORAGE_PROOF_WORKERS`]. + /// Value is clamped to the remaining concurrency budget (leaving one on-demand slot). pub const fn with_storage_proof_workers(mut self, workers: usize) -> Self { - // Note: Can't use clamp in const fn, so we'll do manual clamping - let clamped = if workers < MIN_STORAGE_PROOF_WORKERS { - MIN_STORAGE_PROOF_WORKERS - } else if workers > MAX_STORAGE_PROOF_WORKERS { + let max_allowed = self.max_proof_task_concurrency.saturating_sub(1) as usize; + let clamped = if workers > MAX_STORAGE_PROOF_WORKERS { MAX_STORAGE_PROOF_WORKERS + } else if workers > max_allowed { + max_allowed } else { workers }; diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 1d8019171ab..fcab2787ff1 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -84,31 +84,27 @@ enum OnDemandTask { /// A task that manages sending proof requests to worker pools and on-demand tasks. /// -/// # Architecture (PR1: Storage Workers Only) +/// # Architecture /// /// This manager maintains two execution paths: /// -/// 1. **Storage Worker Pool** (NEW): +/// 1. **Storage Worker Pool**: /// - Pre-spawned workers with dedicated long-lived transactions /// - Tasks queued via crossbeam bounded channel /// - Workers continuously process without transaction return overhead /// -/// 2. **On-Demand Execution** (EXISTING): +/// 2. **On-Demand Execution**: /// - Lazy transaction creation for blinded node fetches -/// - Transactions returned to pool after use (original behavior) -/// - Same message-passing mechanism as before +/// - Transactions returned to pool after use /// /// # External API /// -/// The external API via `ProofTaskManagerHandle` is COMPLETELY UNCHANGED: -/// - `queue_task(ProofTaskKind)` signature identical -/// - Same `std::mpsc` message passing -/// - Same return types and error handling -/// -/// All changes are internal routing optimizations. +/// The external API via `ProofTaskManagerHandle`: +/// - `queue_task(ProofTaskKind)` for submitting tasks +/// - `std::mpsc` message passing +/// - Consistent return types and error handling #[derive(Debug)] pub struct ProofTaskManager { - // ==================== STORAGE WORKER POOL (NEW) ==================== /// Sender for storage proof tasks to worker pool. /// /// Queue capacity = `storage_worker_count` * 2 (for 2x buffering) @@ -119,15 +115,12 @@ pub struct ProofTaskManager { /// May be less than requested if transaction creation fails. storage_worker_count: usize, - // ==================== ON-DEMAND TRANSACTION POOL (REFACTORED) ==================== /// Maximum number of on-demand transactions for blinded node fetches. /// /// Calculated as: `max_concurrency` - `storage_worker_count` max_on_demand_txs: usize, /// Currently available on-demand transactions (reused after return). - /// - /// Same lifecycle as before PR1. on_demand_txs: Vec>>, /// Total on-demand transactions created (for ID assignment). @@ -135,13 +128,10 @@ pub struct ProofTaskManager { /// Queue of pending on-demand tasks waiting for available transaction. /// - /// Replaces the old `pending_tasks` `VecDeque` which held all task types. - /// Currently holds `ProofTaskKind` for both blinded node fetches and storage proof - /// fallbacks (when worker pool is full/unavailable). Could be refactored to use - /// the more type-safe `OnDemandTask` enum if strict separation is desired. + /// Holds `ProofTaskKind` for both blinded node fetches and storage proof + /// fallbacks (when worker pool is full/unavailable). pending_on_demand: VecDeque, - // ==================== SHARED RESOURCES ==================== /// Consistent view provider used for creating transactions on-demand. view: ConsistentDbView, @@ -154,7 +144,6 @@ pub struct ProofTaskManager { /// A receiver for new proof task messages from external callers. /// /// This is the `std::mpsc` channel connected to [`ProofTaskManagerHandle`]. - /// UNCHANGED - maintains interface compatibility. proof_task_rx: Receiver>>, /// A sender for internal messaging (transaction returns). @@ -185,24 +174,17 @@ pub struct ProofTaskManager { /// /// # Transaction Reuse /// -/// The key optimization: the worker reuses the same `proof_tx` across ALL proofs, -/// avoiding the overhead of: -/// - Creating new database transactions -/// - Setting up cursor factories -/// - Returning transactions to a pool +/// Reuses the same transaction across multiple proofs to avoid transaction +/// creation and cursor factory setup overhead. /// /// # Panic Safety /// -/// If this function panics, the worker thread terminates but: -/// - Other workers continue operating -/// - The manager detects disconnection when trying to send -/// - System degrades gracefully rather than failing completely +/// If this function panics, the worker thread terminates but other workers +/// continue operating and the system degrades gracefully. /// /// # Shutdown /// -/// Worker shuts down when: -/// - Crossbeam channel closes (all senders dropped) -/// - `ProofTaskManager::run()` drops `storage_work_tx` on terminate +/// Worker shuts down when the crossbeam channel closes (all senders dropped). fn storage_worker_loop( proof_tx: ProofTaskTx, work_rx: CrossbeamReceiver, @@ -219,38 +201,31 @@ fn storage_worker_loop( let mut proofs_processed = 0u64; let start_time = Instant::now(); - // Main worker loop: process jobs until channel closes while let Ok(StorageProofJob { input, result_sender }) = work_rx.recv() { - let proof_start = Instant::now(); + let hashed_address = input.hashed_address; + let prefix_set_len = input.prefix_set.len(); + let target_slots_len = input.target_slots.len(); trace!( target: "trie::proof_task", worker_id, - hashed_address = ?input.hashed_address, - prefix_set_len = input.prefix_set.len(), - target_slots = input.target_slots.len(), + hashed_address = ?hashed_address, + prefix_set_len, + target_slots = target_slots_len, "Processing storage proof" ); - // ==================== CORE COMPUTATION ==================== - // Compute storage proof using reused transaction - // This is the key difference from on-demand execution: - // - No transaction creation overhead - // - No transaction return message - // - Cursor factories reused across proofs - let result = proof_tx.compute_storage_proof(&input); + let proof_start = Instant::now(); + let result = proof_tx.compute_storage_proof(input); let proof_elapsed = proof_start.elapsed(); proofs_processed += 1; - // ==================== RESULT DELIVERY ==================== - // Send result directly to original caller's std::mpsc::Receiver - // If receiver is dropped (caller cancelled), log and continue if result_sender.send(result).is_err() { tracing::debug!( target: "trie::proof_task", worker_id, - hashed_address = ?input.hashed_address, + hashed_address = ?hashed_address, proofs_processed, "Storage proof receiver dropped, discarding result" ); @@ -259,8 +234,10 @@ fn storage_worker_loop( trace!( target: "trie::proof_task", worker_id, - hashed_address = ?input.hashed_address, + hashed_address = ?hashed_address, proof_time_us = proof_elapsed.as_micros(), + prefix_set_len, + target_slots = target_slots_len, total_processed = proofs_processed, "Storage proof completed" ); @@ -299,25 +276,20 @@ where /// /// # Transaction Budget Allocation /// - /// The total `max_concurrency` is split between two pools: + /// The total `max_concurrency` is split between storage workers (pre-allocated) + /// and the on-demand pool (lazy). We always reserve at least one slot for the + /// on-demand path, so the number of workers actually spawned is capped at + /// `max_concurrency - 1`. /// - /// 1. **Storage Workers**: `storage_worker_count` transactions (pre-allocated) - /// 2. **On-Demand Pool**: `max_concurrency - storage_worker_count` (lazy) - /// - /// Example: - /// ```text - /// max_concurrency = 8, storage_worker_count = 4 - /// → 4 storage workers (pre-spawned) - /// → 4 on-demand transactions (created lazily for blinded nodes) - /// Total: 8 transactions max (same capacity as before) - /// ``` + /// For example, if `max_concurrency = 8` and `storage_worker_count = 8`, then + /// 8 workers are requested but only 7 can be accommodated while leaving one + /// on-demand slot, so 7 workers are spawned and the remaining slot is reserved + /// for on-demand transactions (e.g. blinded nodes). /// /// # Worker Spawn Resilience /// - /// If some workers fail to spawn (e.g., transaction creation error): - /// - Failed workers are logged and skipped - /// - On-demand pool is adjusted: `max_concurrency - actual_spawned_workers` - /// - System continues with fewer workers rather than failing entirely + /// If some workers fail to spawn, the on-demand pool is adjusted accordingly + /// and the system continues with fewer workers. /// /// # Panics /// @@ -329,35 +301,40 @@ where max_concurrency: usize, storage_worker_count: usize, ) -> Self { - // Create message channel for external callers (UNCHANGED) let (tx_sender, proof_task_rx) = channel(); - // ==================== STORAGE WORKER POOL SETUP ==================== + let worker_budget = max_concurrency.saturating_sub(1); + let planned_workers = storage_worker_count.min(worker_budget); + + if planned_workers < storage_worker_count { + tracing::debug!( + target: "trie::proof_task", + requested = storage_worker_count, + capped = planned_workers, + max_concurrency, + "Adjusted storage worker count to fit concurrency budget" + ); + } - // Queue capacity: 2x buffering to reduce contention - // If workers = 4, queue holds 8 tasks maximum - let queue_capacity = storage_worker_count.saturating_mul(2).max(1); + let queue_capacity = planned_workers.saturating_mul(2).max(1); let (storage_work_tx, storage_work_rx) = bounded::(queue_capacity); tracing::info!( target: "trie::proof_task", - storage_worker_count, + storage_worker_count = planned_workers, queue_capacity, max_concurrency, "Initializing storage proof worker pool" ); - // Spawn storage workers - each gets its own long-lived transaction let mut spawned_workers = 0; - for worker_id in 0..storage_worker_count { - // Try to create transaction for this worker + for worker_id in 0..planned_workers { match view.provider_ro() { Ok(provider_ro) => { let tx = provider_ro.into_tx(); let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); let work_rx = storage_work_rx.clone(); - // Spawn worker on tokio blocking pool executor.spawn_blocking(move || { storage_worker_loop(proof_task_tx, work_rx, worker_id) }); @@ -372,12 +349,11 @@ where ); } Err(err) => { - // Non-fatal: log and continue with fewer workers tracing::warn!( target: "trie::proof_task", worker_id, ?err, - requested = storage_worker_count, + requested = planned_workers, spawned_workers, "Failed to create transaction for storage worker, continuing with fewer workers" ); @@ -385,17 +361,16 @@ where } } - // Verify we spawned at least some workers if spawned_workers == 0 { tracing::error!( target: "trie::proof_task", - requested = storage_worker_count, - "Failed to spawn any storage workers - all will use on-demand pool" + requested = planned_workers, + "Failed to spawn any storage workers - all work will execute on-demand" ); - } else if spawned_workers < storage_worker_count { + } else if spawned_workers < planned_workers { tracing::warn!( target: "trie::proof_task", - requested = storage_worker_count, + requested = planned_workers, spawned = spawned_workers, "Spawned fewer storage workers than requested" ); @@ -408,11 +383,8 @@ where ); } - // ==================== ON-DEMAND POOL SETUP ==================== - - // Calculate on-demand budget: remaining capacity after storage workers - // Ensure at least 1 on-demand transaction even if storage workers consume all budget - let max_on_demand_txs = max_concurrency.saturating_sub(spawned_workers).max(1); + // Allocate remaining capacity to on-demand pool. + let max_on_demand_txs = max_concurrency.saturating_sub(spawned_workers); tracing::debug!( target: "trie::proof_task", @@ -422,20 +394,13 @@ where "Configured on-demand transaction pool for blinded nodes" ); - // ==================== CONSTRUCT MANAGER ==================== - Self { - // Storage worker pool storage_work_tx, storage_worker_count: spawned_workers, - - // On-demand pool max_on_demand_txs, on_demand_txs: Vec::with_capacity(max_on_demand_txs), on_demand_tx_count: 0, pending_on_demand: VecDeque::new(), - - // Shared resources view, task_ctx, executor, @@ -449,11 +414,6 @@ where } /// Returns a handle for sending new proof tasks to the manager. - /// - /// # Interface Compatibility - /// - /// This method is UNCHANGED from the original implementation. The returned - /// `ProofTaskManagerHandle` has the exact same public API as before PR1. pub fn handle(&self) -> ProofTaskManagerHandle> { ProofTaskManagerHandle::new(self.tx_sender.clone(), self.active_handles.clone()) } @@ -520,102 +480,91 @@ where /// /// # Task Routing /// - /// - **Storage Proofs**: Routed to pre-spawned worker pool via bounded channel - /// - If channel is full, falls back to on-demand spawn - /// - **Blinded Nodes**: Queued for on-demand execution (original behavior) + /// - **Storage Proofs**: Routed to pre-spawned worker pool via bounded channel. Falls back to + /// on-demand spawn if channel is full or disconnected. + /// - **Blinded Nodes**: Queued for on-demand execution. /// - /// # Worker Pool Lifecycle + /// # Shutdown /// - /// On termination, `storage_work_tx` is dropped, closing the channel and signaling - /// all workers to shut down gracefully. + /// On termination, `storage_work_tx` is dropped, closing the channel and + /// signaling all workers to shut down gracefully. pub fn run(mut self) -> ProviderResult<()> { loop { match self.proof_task_rx.recv() { Ok(message) => { match message { - ProofTaskMessage::QueueTask(task) => { - match task { - // ==================== STORAGE PROOF ROUTING ==================== - ProofTaskKind::StorageProof(input, sender) => { - #[cfg(feature = "metrics")] - { - self.metrics.storage_proofs += 1; - } + ProofTaskMessage::QueueTask(task) => match task { + ProofTaskKind::StorageProof(input, sender) => { + #[cfg(feature = "metrics")] + { + self.metrics.storage_proofs += 1; + } - // Try to send to worker pool first - match self - .storage_work_tx - .try_send(StorageProofJob { input, result_sender: sender }) - { - Ok(_) => { - // Successfully queued to worker pool - tracing::trace!( - target: "trie::proof_task", - "Storage proof dispatched to worker pool" - ); - } - Err(crossbeam_channel::TrySendError::Full(job)) => { - // Channel full - fall back to on-demand spawn - tracing::debug!( - target: "trie::proof_task", - "Worker pool queue full, spawning on-demand" - ); - - #[cfg(feature = "metrics")] - { - self.metrics.on_demand_fallback += 1; - } - - // Queue for on-demand execution - self.pending_on_demand.push_back( - ProofTaskKind::StorageProof( - job.input, - job.result_sender, - ), - ); + match self + .storage_work_tx + .try_send(StorageProofJob { input, result_sender: sender }) + { + Ok(_) => { + tracing::trace!( + target: "trie::proof_task", + "Storage proof dispatched to worker pool" + ); + } + Err(crossbeam_channel::TrySendError::Full(job)) => { + tracing::debug!( + target: "trie::proof_task", + "Worker pool queue full, spawning on-demand" + ); + + #[cfg(feature = "metrics")] + { + self.metrics.on_demand_fallback += 1; } - Err(crossbeam_channel::TrySendError::Disconnected(job)) => { - // No workers available (likely all spawns failed) - - // fall back to on-demand - tracing::warn!( - target: "trie::proof_task", - storage_worker_count = self.storage_worker_count, - "Worker pool disconnected (no workers available), falling back to on-demand" - ); - - #[cfg(feature = "metrics")] - { - self.metrics.on_demand_fallback += 1; - } - - // Queue for on-demand execution instead of failing - self.pending_on_demand.push_back( - ProofTaskKind::StorageProof( - job.input, - job.result_sender, - ), - ); + + self.pending_on_demand.push_back( + ProofTaskKind::StorageProof( + job.input, + job.result_sender, + ), + ); + } + Err(crossbeam_channel::TrySendError::Disconnected(job)) => { + tracing::warn!( + target: "trie::proof_task", + storage_worker_count = self.storage_worker_count, + "Worker pool disconnected (no workers available), falling back to on-demand" + ); + + #[cfg(feature = "metrics")] + { + self.metrics.on_demand_fallback += 1; } + + self.pending_on_demand.push_back( + ProofTaskKind::StorageProof( + job.input, + job.result_sender, + ), + ); } } + } - // ==================== BLINDED NODE ROUTING ==================== - ProofTaskKind::BlindedAccountNode(_, _) => { - #[cfg(feature = "metrics")] - { - self.metrics.account_nodes += 1; - } - self.queue_proof_task(task); + ProofTaskKind::BlindedAccountNode(_, _) => { + #[cfg(feature = "metrics")] + { + self.metrics.account_nodes += 1; } - ProofTaskKind::BlindedStorageNode(_, _, _) => { - #[cfg(feature = "metrics")] - { - self.metrics.storage_nodes += 1; - } - self.queue_proof_task(task); + self.queue_proof_task(task); + } + ProofTaskKind::BlindedStorageNode(_, _, _) => { + #[cfg(feature = "metrics")] + { + self.metrics.storage_nodes += 1; } + self.queue_proof_task(task); } - } + }, ProofTaskMessage::Transaction(tx) => { // Return transaction to on-demand pool self.on_demand_txs.push(tx); @@ -695,82 +644,64 @@ where (trie_cursor_factory, hashed_cursor_factory) } - /// Compute storage proof without consuming self (for worker pool reuse). - /// - /// # Purpose - /// - /// This method enables transaction reuse in the storage worker pool. Unlike the - /// original `storage_proof(self, ...)` which consumes self and returns the - /// transaction to a pool, this method: - /// - /// 1. Borrows self immutably - /// 2. Computes the proof using the owned transaction - /// 3. Returns only the result (transaction remains owned) - /// 4. Can be called repeatedly on the same [`ProofTaskTx`] instance - /// - /// # Usage + /// Compute storage proof without consuming self. /// - /// This is called exclusively by storage workers in the worker pool. On-demand - /// execution still uses the original `storage_proof(self, ...)` method which - /// consumes self and returns the transaction. - /// - /// # Performance - /// - /// By reusing the same transaction and cursor factories across multiple proofs: - /// - Eliminates per-proof transaction creation overhead - /// - Avoids message passing to return transactions - /// - Reduces memory allocations for cursor factories - fn compute_storage_proof(&self, input: &StorageProofInput) -> StorageProofResult { - // ==================== SETUP ==================== - - // Create cursor factories (same as original implementation) + /// Borrows self immutably to allow transaction reuse across multiple calls. + /// Used by storage workers in the worker pool to avoid transaction creation + /// overhead on each proof computation. + fn compute_storage_proof(&self, input: StorageProofInput) -> StorageProofResult { let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); + // Consume the input so we can move large collections (e.g. target slots) without cloning. + let StorageProofInput { + hashed_address, + prefix_set, + target_slots, + with_branch_node_masks, + multi_added_removed_keys, + } = input; + let prefix_set_len = prefix_set.len(); + let target_slots_len = target_slots.len(); + // Get or create added/removed keys context - let multi_added_removed_keys = input - .multi_added_removed_keys - .clone() - .unwrap_or_else(|| Arc::new(MultiAddedRemovedKeys::new())); - let added_removed_keys = multi_added_removed_keys.get_storage(&input.hashed_address); + let multi_added_removed_keys = + multi_added_removed_keys.unwrap_or_else(|| Arc::new(MultiAddedRemovedKeys::new())); + let added_removed_keys = multi_added_removed_keys.get_storage(&hashed_address); let span = tracing::trace_span!( target: "trie::proof_task", "Storage proof calculation", - hashed_address = ?input.hashed_address, + hashed_address = ?hashed_address, // Worker ID embedded in ProofTaskTx for trace correlation worker_id = self.id, ); let _guard = span.enter(); - let target_slots_len = input.target_slots.len(); let proof_start = Instant::now(); // Compute raw storage multiproof (identical to original) - let raw_proof_result = StorageProof::new_hashed( - trie_cursor_factory, - hashed_cursor_factory, - input.hashed_address, - ) - .with_prefix_set_mut(PrefixSetMut::from(input.prefix_set.iter().copied())) - .with_branch_node_masks(input.with_branch_node_masks) - .with_added_removed_keys(added_removed_keys) - .storage_multiproof(input.target_slots.clone()) - .map_err(|e| ParallelStateRootError::Other(e.to_string())); + let raw_proof_result = + StorageProof::new_hashed(trie_cursor_factory, hashed_cursor_factory, hashed_address) + .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) + .with_branch_node_masks(with_branch_node_masks) + .with_added_removed_keys(added_removed_keys) + .storage_multiproof(target_slots) + .map_err(|e| ParallelStateRootError::Other(e.to_string())); // Decode proof into DecodedStorageMultiProof let decoded_result = raw_proof_result.and_then(|raw_proof| { raw_proof.try_into().map_err(|e: alloy_rlp::Error| { ParallelStateRootError::Other(format!( "Failed to decode storage proof for {}: {}", - input.hashed_address, e + hashed_address, e )) }) }); trace!( target: "trie::proof_task", - hashed_address = ?input.hashed_address, - prefix_set_len = input.prefix_set.len(), + hashed_address = ?hashed_address, + prefix_set_len, target_slots = target_slots_len, proof_time_us = proof_start.elapsed().as_micros(), worker_id = self.id, From d4e0adb75ceb64f56e44bdb600966f4b8f05a4d4 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:16:08 +0000 Subject: [PATCH 05/52] refactor(metrics): remove unused storage proof metrics from ProofTaskMetrics and ProofTaskTrieMetrics --- crates/trie/parallel/src/proof_task_metrics.rs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/crates/trie/parallel/src/proof_task_metrics.rs b/crates/trie/parallel/src/proof_task_metrics.rs index 97ec8e6f113..cdb59d078d8 100644 --- a/crates/trie/parallel/src/proof_task_metrics.rs +++ b/crates/trie/parallel/src/proof_task_metrics.rs @@ -9,10 +9,6 @@ pub struct ProofTaskMetrics { pub account_nodes: usize, /// Count of blinded storage node requests. pub storage_nodes: usize, - /// Count of storage proof requests routed to worker pool. - pub storage_proofs: usize, - /// Count of times worker pool was full and fell back to on-demand execution. - pub on_demand_fallback: usize, } impl ProofTaskMetrics { @@ -20,7 +16,6 @@ impl ProofTaskMetrics { pub fn record(&self) { self.task_metrics.record_account_nodes(self.account_nodes); self.task_metrics.record_storage_nodes(self.storage_nodes); - self.task_metrics.record_storage_proofs(self.storage_proofs); } } @@ -32,8 +27,6 @@ pub struct ProofTaskTrieMetrics { blinded_account_nodes: Histogram, /// A histogram for the number of blinded storage nodes fetched. blinded_storage_nodes: Histogram, - /// A histogram for the number of storage proofs computed via worker pool. - storage_proofs: Histogram, } impl ProofTaskTrieMetrics { @@ -46,9 +39,4 @@ impl ProofTaskTrieMetrics { pub fn record_storage_nodes(&self, count: usize) { self.blinded_storage_nodes.record(count as f64); } - - /// Record storage proofs computed via worker pool. - pub fn record_storage_proofs(&self, count: usize) { - self.storage_proofs.record(count as f64); - } } From 2957afaef5534b70f0c5f295d600e849be719125 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:16:23 +0000 Subject: [PATCH 06/52] refactor(proof_task): improve documentation and rename variables for clarity - Updated comments in `ProofTaskManager` to enhance clarity regarding on-demand transaction handling and queue management. - Renamed `pending_on_demand` to `on_demand_queue` for better understanding of its purpose. - Adjusted the `new` function documentation to reflect the correct allocation of concurrency budget between storage workers and on-demand transactions. - Improved the `queue_proof_task` method to use the new queue name. --- crates/trie/parallel/src/proof_task.rs | 146 ++++++++++++++----------- 1 file changed, 83 insertions(+), 63 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index fcab2787ff1..ba60ae81a4a 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -120,17 +120,17 @@ pub struct ProofTaskManager { /// Calculated as: `max_concurrency` - `storage_worker_count` max_on_demand_txs: usize, - /// Currently available on-demand transactions (reused after return). + /// On-demand transaction pool for blinded node fetches. on_demand_txs: Vec>>, /// Total on-demand transactions created (for ID assignment). on_demand_tx_count: usize, - /// Queue of pending on-demand tasks waiting for available transaction. + /// Queue of tasks waiting for on-demand transaction assignment. /// /// Holds `ProofTaskKind` for both blinded node fetches and storage proof /// fallbacks (when worker pool is full/unavailable). - pending_on_demand: VecDeque, + on_demand_queue: VecDeque, /// Consistent view provider used for creating transactions on-demand. view: ConsistentDbView, @@ -141,14 +141,10 @@ pub struct ProofTaskManager { /// The underlying handle from which to spawn proof tasks. executor: Handle, - /// A receiver for new proof task messages from external callers. - /// - /// This is the `std::mpsc` channel connected to [`ProofTaskManagerHandle`]. + /// Receives proof task requests from [`ProofTaskManagerHandle`]. proof_task_rx: Receiver>>, - /// A sender for internal messaging (transaction returns). - /// - /// Used by on-demand tasks to return transactions to pool. + /// Internal channel for on-demand tasks to return transactions after use. tx_sender: Sender>>, /// The number of active handles. @@ -264,36 +260,12 @@ impl ProofTaskManager where Factory: DatabaseProviderFactory, { - /// Creates a new [`ProofTaskManager`] with the given configuration. - /// - /// # Arguments - /// - /// * `executor` - Tokio runtime handle for spawning workers and tasks - /// * `view` - Consistent database view for creating read-only transactions - /// * `task_ctx` - Shared context (trie updates, hashed state, prefix sets) - /// * `max_concurrency` - Total transaction budget across all execution paths - /// * `storage_worker_count` - Number of storage proof workers to pre-spawn - /// - /// # Transaction Budget Allocation - /// - /// The total `max_concurrency` is split between storage workers (pre-allocated) - /// and the on-demand pool (lazy). We always reserve at least one slot for the - /// on-demand path, so the number of workers actually spawned is capped at - /// `max_concurrency - 1`. - /// - /// For example, if `max_concurrency = 8` and `storage_worker_count = 8`, then - /// 8 workers are requested but only 7 can be accommodated while leaving one - /// on-demand slot, so 7 workers are spawned and the remaining slot is reserved - /// for on-demand transactions (e.g. blinded nodes). + /// Creates a new [`ProofTaskManager`] with pre-spawned storage proof workers. /// - /// # Worker Spawn Resilience - /// - /// If some workers fail to spawn, the on-demand pool is adjusted accordingly - /// and the system continues with fewer workers. - /// - /// # Panics - /// - /// Does not panic. All errors are logged and handled gracefully. + /// The `max_concurrency` budget is split between pre-spawned storage workers and an + /// on-demand pool. At least one slot is always reserved for on-demand, so the actual + /// number of workers spawned is `min(storage_worker_count, max_concurrency - 1)`. + /// If workers fail to spawn, the system continues with fewer workers. pub fn new( executor: Handle, view: ConsistentDbView, @@ -400,7 +372,7 @@ where max_on_demand_txs, on_demand_txs: Vec::with_capacity(max_on_demand_txs), on_demand_tx_count: 0, - pending_on_demand: VecDeque::new(), + on_demand_queue: VecDeque::new(), view, task_ctx, executor, @@ -413,7 +385,7 @@ where } } - /// Returns a handle for sending new proof tasks to the manager. + /// Returns a handle for sending new proof tasks to the [`ProofTaskManager`]. pub fn handle(&self) -> ProofTaskManagerHandle> { ProofTaskManagerHandle::new(self.tx_sender.clone(), self.active_handles.clone()) } @@ -425,7 +397,7 @@ where { /// Inserts the task into the pending tasks queue. pub fn queue_proof_task(&mut self, task: ProofTaskKind) { - self.pending_on_demand.push_back(task); + self.on_demand_queue.push_back(task); } /// Gets either the next available transaction, or creates a new one if all are in use and the @@ -452,11 +424,11 @@ where /// This will return an error if a transaction must be created on-demand and the consistent view /// provider fails. pub fn try_spawn_next(&mut self) -> ProviderResult<()> { - let Some(task) = self.pending_on_demand.pop_front() else { return Ok(()) }; + let Some(task) = self.on_demand_queue.pop_front() else { return Ok(()) }; let Some(proof_task_tx) = self.get_or_create_tx()? else { // if there are no txs available, requeue the proof task - self.pending_on_demand.push_front(task); + self.on_demand_queue.push_front(task); return Ok(()) }; @@ -495,11 +467,6 @@ where match message { ProofTaskMessage::QueueTask(task) => match task { ProofTaskKind::StorageProof(input, sender) => { - #[cfg(feature = "metrics")] - { - self.metrics.storage_proofs += 1; - } - match self .storage_work_tx .try_send(StorageProofJob { input, result_sender: sender }) @@ -516,12 +483,7 @@ where "Worker pool queue full, spawning on-demand" ); - #[cfg(feature = "metrics")] - { - self.metrics.on_demand_fallback += 1; - } - - self.pending_on_demand.push_back( + self.on_demand_queue.push_back( ProofTaskKind::StorageProof( job.input, job.result_sender, @@ -535,12 +497,7 @@ where "Worker pool disconnected (no workers available), falling back to on-demand" ); - #[cfg(feature = "metrics")] - { - self.metrics.on_demand_fallback += 1; - } - - self.pending_on_demand.push_back( + self.on_demand_queue.push_back( ProofTaskKind::StorageProof( job.input, job.result_sender, @@ -709,9 +666,6 @@ where ); decoded_result - - // NOTE: self is NOT consumed - transaction remains owned by worker - // No ProofTaskMessage::Transaction sent } /// Calculates a storage proof for the given hashed address, and desired prefix set. @@ -1069,3 +1023,69 @@ impl TrieNodeProvider for ProofTaskTrieNodeProvider { rx.recv().unwrap() } } + +#[cfg(test)] +mod tests { + use super::*; + use alloy_primitives::map::B256Map; + use reth_provider::{providers::ConsistentDbView, test_utils::create_test_provider_factory}; + use reth_trie_common::{ + prefix_set::TriePrefixSetsMut, updates::TrieUpdatesSorted, HashedAccountsSorted, + HashedPostStateSorted, + }; + use std::sync::Arc; + use tokio::{runtime::Builder, task}; + + fn test_ctx() -> ProofTaskCtx { + ProofTaskCtx::new( + Arc::new(TrieUpdatesSorted::default()), + Arc::new(HashedPostStateSorted::new( + HashedAccountsSorted::default(), + B256Map::default(), + )), + Arc::new(TriePrefixSetsMut::default()), + ) + } + + /// Ensures the storage worker pool plus on-demand pool never exceed the requested concurrency + /// when the storage worker count saturates the budget. + #[test] + fn proof_task_manager_respects_concurrency_budget() { + let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); + runtime.block_on(async { + let handle = tokio::runtime::Handle::current(); + let factory = create_test_provider_factory(); + let view = ConsistentDbView::new(factory, None); + let ctx = test_ctx(); + + let manager = ProofTaskManager::new(handle.clone(), view, ctx, 2, 2); + assert_eq!(manager.storage_worker_count, 1); + assert_eq!(manager.max_on_demand_txs, 1); + assert!(manager.storage_worker_count + manager.max_on_demand_txs <= 2); + + drop(manager); + task::yield_now().await; + }); + } + + /// Ensures the manager falls back to on-demand transactions when the budget only allows a + /// single concurrent transaction. + #[test] + fn proof_task_manager_handles_single_concurrency() { + let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); + runtime.block_on(async { + let handle = tokio::runtime::Handle::current(); + let factory = create_test_provider_factory(); + let view = ConsistentDbView::new(factory, None); + let ctx = test_ctx(); + + let manager = ProofTaskManager::new(handle.clone(), view, ctx, 1, 5); + assert_eq!(manager.storage_worker_count, 0); + assert_eq!(manager.max_on_demand_txs, 1); + assert!(manager.storage_worker_count + manager.max_on_demand_txs <= 1); + + drop(manager); + task::yield_now().await; + }); + } +} From 800dcf6d52264e6eb9c8e133edb378d4c0d2fa6c Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:33:04 +0000 Subject: [PATCH 07/52] refactor(proof_task): streamline documentation and clarify task management - Removed the unused `OnDemandTask` enum and updated comments in `ProofTaskManager` to clarify the distinction between storage worker pool and on-demand execution. - Enhanced documentation to better describe the public interface and task submission process. - Improved clarity regarding transaction handling and execution paths for proof requests. --- crates/trie/parallel/src/proof_task.rs | 36 +++++++------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index ba60ae81a4a..b8f117d824e 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -64,45 +64,27 @@ struct StorageProofJob { result_sender: Sender, } -/// Internal message for on-demand task execution. -/// -/// **Note**: Currently unused in favor of [`ProofTaskKind`] for simplicity and backwards -/// compatibility. This enum represents a more type-safe design where on-demand tasks -/// (blinded nodes) are strictly separated from worker pool tasks (storage proofs). -/// Available for future refactoring if stricter type safety is desired. -/// -/// These tasks would be executed with lazily-created transactions that are -/// returned to the pool after use (same as current behavior). -#[derive(Debug)] -#[allow(dead_code)] -enum OnDemandTask { - /// Fetch a blinded account node by path - BlindedAccountNode(Nibbles, Sender), - /// Fetch a blinded storage node by account and path - BlindedStorageNode(B256, Nibbles, Sender), -} - -/// A task that manages sending proof requests to worker pools and on-demand tasks. +/// Manager for coordinating proof request execution across different task types. /// /// # Architecture /// -/// This manager maintains two execution paths: +/// This manager handles two distinct execution paths: /// /// 1. **Storage Worker Pool**: /// - Pre-spawned workers with dedicated long-lived transactions /// - Tasks queued via crossbeam bounded channel -/// - Workers continuously process without transaction return overhead +/// - Workers continuously process without transaction overhead /// /// 2. **On-Demand Execution**: /// - Lazy transaction creation for blinded node fetches -/// - Transactions returned to pool after use +/// - Transactions returned to pool after use for reuse /// -/// # External API +/// # Public Interface /// -/// The external API via `ProofTaskManagerHandle`: -/// - `queue_task(ProofTaskKind)` for submitting tasks -/// - `std::mpsc` message passing -/// - Consistent return types and error handling +/// The public interface through `ProofTaskManagerHandle` allows external callers to: +/// - Submit tasks via `queue_task(ProofTaskKind)` +/// - Use standard `std::mpsc` message passing +/// - Receive consistent return types and error handling #[derive(Debug)] pub struct ProofTaskManager { /// Sender for storage proof tasks to worker pool. From 29d48d4e1ecf9a835e59f43a737dd3bb3f139ead Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:36:02 +0000 Subject: [PATCH 08/52] refactor(config): remove storage proof worker configuration - Eliminated the `storage_proof_workers` field and related constants from `TreeConfig`. - Updated the default implementation and related methods to reflect the removal, streamlining the configuration structure. --- crates/engine/primitives/src/config.rs | 47 ------------------- .../tree/src/tree/payload_processor/mod.rs | 5 +- 2 files changed, 4 insertions(+), 48 deletions(-) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index 10d93676f5e..e5f58523d03 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -20,12 +20,6 @@ pub const DEFAULT_RESERVED_CPU_CORES: usize = 1; /// Default maximum concurrency for prewarm task. pub const DEFAULT_PREWARM_MAX_CONCURRENCY: usize = 16; -/// Maximum number of storage proof workers -const MAX_STORAGE_PROOF_WORKERS: usize = 12; - -/// Default ratio of storage proof workers to `max_proof_task_concurrency` -const DEFAULT_STORAGE_PROOF_WORKER_RATIO: f32 = 0.5; - const DEFAULT_BLOCK_BUFFER_LIMIT: u32 = 256; const DEFAULT_MAX_INVALID_HEADER_CACHE_LENGTH: u32 = 256; const DEFAULT_MAX_EXECUTE_BLOCK_BATCH_SIZE: usize = 4; @@ -115,9 +109,6 @@ pub struct TreeConfig { prewarm_max_concurrency: usize, /// Whether to unwind canonical header to ancestor during forkchoice updates. allow_unwind_canonical_header: bool, - /// Number of dedicated storage proof workers. - /// If None, defaults to half of `max_proof_task_concurrency`. - storage_proof_workers: Option, } impl Default for TreeConfig { @@ -144,7 +135,6 @@ impl Default for TreeConfig { always_process_payload_attributes_on_canonical_head: false, prewarm_max_concurrency: DEFAULT_PREWARM_MAX_CONCURRENCY, allow_unwind_canonical_header: false, - storage_proof_workers: None, } } } @@ -174,7 +164,6 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head: bool, prewarm_max_concurrency: usize, allow_unwind_canonical_header: bool, - storage_proof_workers: Option, ) -> Self { Self { persistence_threshold, @@ -198,7 +187,6 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head, prewarm_max_concurrency, allow_unwind_canonical_header, - storage_proof_workers, } } @@ -464,39 +452,4 @@ impl TreeConfig { pub const fn prewarm_max_concurrency(&self) -> usize { self.prewarm_max_concurrency } - - /// Get the number of storage proof workers. - /// - /// Defaults to half of `max_proof_task_concurrency`, clamped to valid range and leaving at - /// least one slot for on-demand work. - pub fn storage_proof_workers(&self) -> usize { - let max_allowed = self.max_proof_task_concurrency.saturating_sub(1) as usize; - if max_allowed == 0 { - return 0; - } - - self.storage_proof_workers.unwrap_or_else(|| { - let derived = (self.max_proof_task_concurrency as f32 * - DEFAULT_STORAGE_PROOF_WORKER_RATIO) as usize; - let capped = derived.min(MAX_STORAGE_PROOF_WORKERS); - - capped.clamp(1, max_allowed) - }) - } - - /// Set the number of storage proof workers explicitly. - /// - /// Value is clamped to the remaining concurrency budget (leaving one on-demand slot). - pub const fn with_storage_proof_workers(mut self, workers: usize) -> Self { - let max_allowed = self.max_proof_task_concurrency.saturating_sub(1) as usize; - let clamped = if workers > MAX_STORAGE_PROOF_WORKERS { - MAX_STORAGE_PROOF_WORKERS - } else if workers > max_allowed { - max_allowed - } else { - workers - }; - self.storage_proof_workers = Some(clamped); - self - } } diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 8aa1f0b4bfe..37785d30c2e 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -196,7 +196,10 @@ where state_root_config.prefix_sets.clone(), ); let max_proof_task_concurrency = config.max_proof_task_concurrency() as usize; - let storage_worker_count = config.storage_proof_workers(); + // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and blinded nodes) + let storage_worker_count = (max_proof_task_concurrency / 2) + .max(1) + .min(max_proof_task_concurrency.saturating_sub(1)); let proof_task = ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), From 3fb97c6977bb672f519a25355ab9e2e40edb045b Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 07:53:38 +0000 Subject: [PATCH 09/52] refactor(proof_task): enhance comments and adjust queue capacity logic - Improved comments in `ProofTaskManager` and related functions for better clarity on task management and processing. - Updated queue capacity calculation to use 4x buffering, reducing fallback to slower on-demand execution during burst loads. - Removed redundant variable assignments to streamline the code. --- .../tree/src/tree/payload_processor/mod.rs | 3 +- crates/trie/parallel/src/proof_task.rs | 47 ++++--------------- 2 files changed, 10 insertions(+), 40 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 37785d30c2e..bf17d3a1164 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -196,7 +196,8 @@ where state_root_config.prefix_sets.clone(), ); let max_proof_task_concurrency = config.max_proof_task_concurrency() as usize; - // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and blinded nodes) + // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and + // blinded nodes) let storage_worker_count = (max_proof_task_concurrency / 2) .max(1) .min(max_proof_task_concurrency.saturating_sub(1)); diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index b8f117d824e..384951d621f 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -88,8 +88,6 @@ struct StorageProofJob { #[derive(Debug)] pub struct ProofTaskManager { /// Sender for storage proof tasks to worker pool. - /// - /// Queue capacity = `storage_worker_count` * 2 (for 2x buffering) storage_work_tx: CrossbeamSender, /// Number of storage workers successfully spawned. @@ -177,19 +175,16 @@ fn storage_worker_loop( ); let mut proofs_processed = 0u64; - let start_time = Instant::now(); while let Ok(StorageProofJob { input, result_sender }) = work_rx.recv() { let hashed_address = input.hashed_address; - let prefix_set_len = input.prefix_set.len(); - let target_slots_len = input.target_slots.len(); trace!( target: "trie::proof_task", worker_id, hashed_address = ?hashed_address, - prefix_set_len, - target_slots = target_slots_len, + prefix_set_len = input.prefix_set.len(), + target_slots = input.target_slots.len(), "Processing storage proof" ); @@ -214,26 +209,16 @@ fn storage_worker_loop( worker_id, hashed_address = ?hashed_address, proof_time_us = proof_elapsed.as_micros(), - prefix_set_len, - target_slots = target_slots_len, total_processed = proofs_processed, "Storage proof completed" ); } // Channel closed - graceful shutdown - let total_elapsed = start_time.elapsed(); - tracing::info!( target: "trie::proof_task", worker_id, proofs_processed, - uptime_secs = total_elapsed.as_secs(), - avg_proof_time_ms = if proofs_processed > 0 { - total_elapsed.as_millis() / proofs_processed as u128 - } else { - 0 - }, "Storage proof worker shutting down" ); } @@ -270,7 +255,9 @@ where ); } - let queue_capacity = planned_workers.saturating_mul(2).max(1); + // Use 4x buffering to prevent queue saturation under burst load. + // Deeper queue reduces fallback to slower on-demand execution when workers are busy. + let queue_capacity = planned_workers.saturating_mul(4).max(1); let (storage_work_tx, storage_work_rx) = bounded::(queue_capacity); tracing::info!( @@ -328,26 +315,11 @@ where spawned = spawned_workers, "Spawned fewer storage workers than requested" ); - } else { - tracing::info!( - target: "trie::proof_task", - spawned_workers, - queue_capacity, - "Storage worker pool initialized successfully" - ); } // Allocate remaining capacity to on-demand pool. let max_on_demand_txs = max_concurrency.saturating_sub(spawned_workers); - tracing::debug!( - target: "trie::proof_task", - max_on_demand_txs, - storage_workers = spawned_workers, - total_capacity = max_concurrency, - "Configured on-demand transaction pool for blinded nodes" - ); - Self { storage_work_tx, storage_worker_count: spawned_workers, @@ -569,6 +541,7 @@ impl ProofTaskTx where Tx: DbTx, { + #[inline] fn create_factories(&self) -> ProofFactories<'_, Tx> { let trie_cursor_factory = InMemoryTrieCursorFactory::new( DatabaseTrieCursorFactory::new(&self.tx), @@ -588,6 +561,7 @@ where /// Borrows self immutably to allow transaction reuse across multiple calls. /// Used by storage workers in the worker pool to avoid transaction creation /// overhead on each proof computation. + #[inline] fn compute_storage_proof(&self, input: StorageProofInput) -> StorageProofResult { let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); @@ -599,8 +573,6 @@ where with_branch_node_masks, multi_added_removed_keys, } = input; - let prefix_set_len = prefix_set.len(); - let target_slots_len = target_slots.len(); // Get or create added/removed keys context let multi_added_removed_keys = @@ -611,14 +583,13 @@ where target: "trie::proof_task", "Storage proof calculation", hashed_address = ?hashed_address, - // Worker ID embedded in ProofTaskTx for trace correlation worker_id = self.id, ); let _guard = span.enter(); let proof_start = Instant::now(); - // Compute raw storage multiproof (identical to original) + // Compute raw storage multiproof let raw_proof_result = StorageProof::new_hashed(trie_cursor_factory, hashed_cursor_factory, hashed_address) .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) @@ -640,8 +611,6 @@ where trace!( target: "trie::proof_task", hashed_address = ?hashed_address, - prefix_set_len, - target_slots = target_slots_len, proof_time_us = proof_start.elapsed().as_micros(), worker_id = self.id, "Completed storage proof calculation" From 5779b869e98407bf900e00feaf604c9f13db0f36 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 08:24:23 +0000 Subject: [PATCH 10/52] disable max concurrency --- crates/engine/primitives/src/config.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index e5f58523d03..f622f32c893 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -165,6 +165,7 @@ impl TreeConfig { prewarm_max_concurrency: usize, allow_unwind_canonical_header: bool, ) -> Self { + assert!(max_proof_task_concurrency > 0, "max_proof_task_concurrency must be at least 1"); Self { persistence_threshold, memory_block_buffer_target, @@ -394,6 +395,7 @@ impl TreeConfig { mut self, max_proof_task_concurrency: u64, ) -> Self { + assert!(max_proof_task_concurrency > 0, "max_proof_task_concurrency must be at least 1"); self.max_proof_task_concurrency = max_proof_task_concurrency; self } From 0e33837041f784cc9b5ec693f79801c89f625f56 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 09:02:34 +0000 Subject: [PATCH 11/52] nits --- crates/trie/parallel/src/proof_task.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 384951d621f..6060d6aca1a 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -96,8 +96,6 @@ pub struct ProofTaskManager { storage_worker_count: usize, /// Maximum number of on-demand transactions for blinded node fetches. - /// - /// Calculated as: `max_concurrency` - `storage_worker_count` max_on_demand_txs: usize, /// On-demand transaction pool for blinded node fetches. @@ -1001,7 +999,7 @@ mod tests { /// Ensures the storage worker pool plus on-demand pool never exceed the requested concurrency /// when the storage worker count saturates the budget. #[test] - fn proof_task_manager_respects_concurrency_budget() { + fn proof_task_manager_within_concurrency_limit() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); runtime.block_on(async { let handle = tokio::runtime::Handle::current(); From 3bcbc71356c27667b8b9e3f10d9c22f7213a90c9 Mon Sep 17 00:00:00 2001 From: YK Date: Tue, 7 Oct 2025 18:39:50 +0800 Subject: [PATCH 12/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Brian Picciano --- crates/trie/parallel/src/proof_task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 6060d6aca1a..b03fcbc71ff 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -213,7 +213,7 @@ fn storage_worker_loop( } // Channel closed - graceful shutdown - tracing::info!( + tracing::debug!( target: "trie::proof_task", worker_id, proofs_processed, From 4a67076900d7a98c8e53925018750c730cb4d711 Mon Sep 17 00:00:00 2001 From: YK Date: Tue, 7 Oct 2025 18:39:58 +0800 Subject: [PATCH 13/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Brian Picciano --- crates/trie/parallel/src/proof_task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index b03fcbc71ff..e9fdccaba9a 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -482,7 +482,7 @@ where // Drop storage_work_tx to signal workers to shut down drop(self.storage_work_tx); - tracing::info!( + tracing::debug!( target: "trie::proof_task", storage_worker_count = self.storage_worker_count, "Shutting down proof task manager, signaling workers to terminate" From b2d5bcc7a7a36e9987533159e5fc5e096518aa77 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 10:48:12 +0000 Subject: [PATCH 14/52] using unbounded queue --- crates/trie/parallel/src/proof_task.rs | 36 ++++++++------------------ 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index e9fdccaba9a..88cbcda8cdd 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -10,7 +10,7 @@ use crate::root::ParallelStateRootError; use alloy_primitives::{map::B256Set, B256}; -use crossbeam_channel::{bounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender}; +use crossbeam_channel::{unbounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender}; use reth_db_api::transaction::DbTx; use reth_execution_errors::SparseTrieError; use reth_provider::{ @@ -72,8 +72,9 @@ struct StorageProofJob { /// /// 1. **Storage Worker Pool**: /// - Pre-spawned workers with dedicated long-lived transactions -/// - Tasks queued via crossbeam bounded channel +/// - Tasks queued via crossbeam unbounded channel /// - Workers continuously process without transaction overhead +/// - Unbounded queue ensures all storage proofs benefit from transaction reuse /// /// 2. **On-Demand Execution**: /// - Lazy transaction creation for blinded node fetches @@ -253,17 +254,15 @@ where ); } - // Use 4x buffering to prevent queue saturation under burst load. - // Deeper queue reduces fallback to slower on-demand execution when workers are busy. - let queue_capacity = planned_workers.saturating_mul(4).max(1); - let (storage_work_tx, storage_work_rx) = bounded::(queue_capacity); + // Use unbounded channel to ensure all storage proofs are queued to workers. + // This maintains transaction reuse benefits and avoids fallback to on-demand execution. + let (storage_work_tx, storage_work_rx) = unbounded::(); tracing::info!( target: "trie::proof_task", storage_worker_count = planned_workers, - queue_capacity, max_concurrency, - "Initializing storage proof worker pool" + "Initializing storage proof worker pool with unbounded queue" ); let mut spawned_workers = 0; @@ -404,8 +403,8 @@ where /// /// # Task Routing /// - /// - **Storage Proofs**: Routed to pre-spawned worker pool via bounded channel. Falls back to - /// on-demand spawn if channel is full or disconnected. + /// - **Storage Proofs**: Routed to pre-spawned worker pool via unbounded channel. Only falls + /// back to on-demand if workers are disconnected (e.g., all workers panicked). /// - **Blinded Nodes**: Queued for on-demand execution. /// /// # Shutdown @@ -421,7 +420,7 @@ where ProofTaskKind::StorageProof(input, sender) => { match self .storage_work_tx - .try_send(StorageProofJob { input, result_sender: sender }) + .send(StorageProofJob { input, result_sender: sender }) { Ok(_) => { tracing::trace!( @@ -429,20 +428,7 @@ where "Storage proof dispatched to worker pool" ); } - Err(crossbeam_channel::TrySendError::Full(job)) => { - tracing::debug!( - target: "trie::proof_task", - "Worker pool queue full, spawning on-demand" - ); - - self.on_demand_queue.push_back( - ProofTaskKind::StorageProof( - job.input, - job.result_sender, - ), - ); - } - Err(crossbeam_channel::TrySendError::Disconnected(job)) => { + Err(crossbeam_channel::SendError(job)) => { tracing::warn!( target: "trie::proof_task", storage_worker_count = self.storage_worker_count, From 8f4e3a1a24ad221c96d9d74e8e28e70c3c92e7cb Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 11:53:01 +0000 Subject: [PATCH 15/52] rm comment --- crates/engine/tree/src/tree/payload_processor/multiproof.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/engine/tree/src/tree/payload_processor/multiproof.rs b/crates/engine/tree/src/tree/payload_processor/multiproof.rs index e435e914622..1f7acbf4d75 100644 --- a/crates/engine/tree/src/tree/payload_processor/multiproof.rs +++ b/crates/engine/tree/src/tree/payload_processor/multiproof.rs @@ -1236,7 +1236,7 @@ mod tests { config.consistent_view.clone(), task_ctx, 1, - 1, // storage_worker_count: 1 for tests + 1, ); let channel = channel(); From b4bf19399484ce1a5c85b456656af2bc2c7c595a Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 11:58:55 +0000 Subject: [PATCH 16/52] refactor(proof_task): optimize storage proof computation by reusing cursor factories - Introduced pre-created cursor factories in `storage_worker_loop` to reduce overhead during proof computation. - Updated `compute_storage_proof` to accept cursor factories as parameters, enhancing efficiency and clarity. - Improved logging to provide better insights into proof task calculations. --- crates/trie/parallel/src/proof_task.rs | 106 +++++++++++-------------- 1 file changed, 47 insertions(+), 59 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 88cbcda8cdd..f60732cfbee 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -173,6 +173,9 @@ fn storage_worker_loop( "Storage proof worker started" ); + // Create factories once at worker startup to avoid recreation overhead. + let (trie_cursor_factory, hashed_cursor_factory) = proof_tx.create_factories(); + let mut proofs_processed = 0u64; while let Ok(StorageProofJob { input, result_sender }) = work_rx.recv() { @@ -188,7 +191,8 @@ fn storage_worker_loop( ); let proof_start = Instant::now(); - let result = proof_tx.compute_storage_proof(input); + let result = + proof_tx.compute_storage_proof(input, &trie_cursor_factory, &hashed_cursor_factory); let proof_elapsed = proof_start.elapsed(); proofs_processed += 1; @@ -540,15 +544,24 @@ where (trie_cursor_factory, hashed_cursor_factory) } - /// Compute storage proof without consuming self. + /// Compute storage proof with pre-created factories. /// - /// Borrows self immutably to allow transaction reuse across multiple calls. - /// Used by storage workers in the worker pool to avoid transaction creation + /// Accepts cursor factories as parameters to allow reuse across multiple proofs. + /// Used by storage workers in the worker pool to avoid factory recreation /// overhead on each proof computation. #[inline] - fn compute_storage_proof(&self, input: StorageProofInput) -> StorageProofResult { - let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); - + fn compute_storage_proof( + &self, + input: StorageProofInput, + trie_cursor_factory: &InMemoryTrieCursorFactory< + DatabaseTrieCursorFactory<&Tx>, + &TrieUpdatesSorted, + >, + hashed_cursor_factory: &HashedPostStateCursorFactory< + DatabaseHashedCursorFactory<&Tx>, + &HashedPostStateSorted, + >, + ) -> StorageProofResult { // Consume the input so we can move large collections (e.g. target slots) without cloning. let StorageProofInput { hashed_address, @@ -574,13 +587,16 @@ where let proof_start = Instant::now(); // Compute raw storage multiproof - let raw_proof_result = - StorageProof::new_hashed(trie_cursor_factory, hashed_cursor_factory, hashed_address) - .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) - .with_branch_node_masks(with_branch_node_masks) - .with_added_removed_keys(added_removed_keys) - .storage_multiproof(target_slots) - .map_err(|e| ParallelStateRootError::Other(e.to_string())); + let raw_proof_result = StorageProof::new_hashed( + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), + hashed_address, + ) + .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) + .with_branch_node_masks(with_branch_node_masks) + .with_added_removed_keys(added_removed_keys) + .storage_multiproof(target_slots) + .map_err(|e| ParallelStateRootError::Other(e.to_string())); // Decode proof into DecodedStorageMultiProof let decoded_result = raw_proof_result.and_then(|raw_proof| { @@ -620,67 +636,39 @@ where ) { trace!( target: "trie::proof_task", - hashed_address=?input.hashed_address, + hashed_address = ?input.hashed_address, "Starting storage proof task calculation" ); - let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); - let multi_added_removed_keys = input - .multi_added_removed_keys - .unwrap_or_else(|| Arc::new(MultiAddedRemovedKeys::new())); - let added_removed_keys = multi_added_removed_keys.get_storage(&input.hashed_address); - - let span = tracing::trace_span!( - target: "trie::proof_task", - "Storage proof calculation", - hashed_address=?input.hashed_address, - // Add a unique id because we often have parallel storage proof calculations for the - // same hashed address, and we want to differentiate them during trace analysis. - span_id=self.id, - ); - let span_guard = span.enter(); - + let hashed_address = input.hashed_address; + let prefix_set_len = input.prefix_set.len(); let target_slots_len = input.target_slots.len(); let proof_start = Instant::now(); - let raw_proof_result = StorageProof::new_hashed( - trie_cursor_factory, - hashed_cursor_factory, - input.hashed_address, - ) - .with_prefix_set_mut(PrefixSetMut::from(input.prefix_set.iter().copied())) - .with_branch_node_masks(input.with_branch_node_masks) - .with_added_removed_keys(added_removed_keys) - .storage_multiproof(input.target_slots) - .map_err(|e| ParallelStateRootError::Other(e.to_string())); - - drop(span_guard); - - let decoded_result = raw_proof_result.and_then(|raw_proof| { - raw_proof.try_into().map_err(|e: alloy_rlp::Error| { - ParallelStateRootError::Other(format!( - "Failed to decode storage proof for {}: {}", - input.hashed_address, e - )) - }) - }); - + // Create factories + let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); + let proof_result = + self.compute_storage_proof(input, &trie_cursor_factory, &hashed_cursor_factory); + let proof_time = proof_start.elapsed(); + let success = proof_result.is_ok(); trace!( target: "trie::proof_task", - hashed_address=?input.hashed_address, - prefix_set = ?input.prefix_set.len(), + hashed_address = ?hashed_address, + prefix_set = ?prefix_set_len, target_slots = ?target_slots_len, - proof_time = ?proof_start.elapsed(), + proof_time = ?proof_time, + worker_id = self.id, + success, "Completed storage proof task calculation" ); // send the result back - if let Err(error) = result_sender.send(decoded_result) { + if let Err(error) = result_sender.send(proof_result) { debug!( target: "trie::proof_task", - hashed_address = ?input.hashed_address, + hashed_address = ?hashed_address, ?error, - task_time = ?proof_start.elapsed(), + task_time = ?proof_time, "Storage proof receiver is dropped, discarding the result" ); } From 6282d2ed87967a2525e66b030dd34faec9db4ea3 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:39:10 +0000 Subject: [PATCH 17/52] propogate error up --- crates/trie/parallel/src/proof_task.rs | 60 ++++++++++---------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index f60732cfbee..58669642fac 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -271,15 +271,26 @@ where let mut spawned_workers = 0; for worker_id in 0..planned_workers { - match view.provider_ro() { - Ok(provider_ro) => { + let provider_ro = match view.provider_ro() { + Ok(provider_ro) => provider_ro, + Err(err) => { + tracing::error!( + target: "trie::proof_task", + worker_id, + ?err, + requested = planned_workers, + spawned_workers, + "Failed to create transaction for storage worker, falling back to on-demand execution" + ); + return Err(err); + } + }; + let tx = provider_ro.into_tx(); let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); let work_rx = storage_work_rx.clone(); - executor.spawn_blocking(move || { - storage_worker_loop(proof_task_tx, work_rx, worker_id) - }); + executor.spawn_blocking(move || storage_worker_loop(proof_task_tx, work_rx, worker_id)); spawned_workers += 1; @@ -290,44 +301,17 @@ where "Storage worker spawned successfully" ); } - Err(err) => { - tracing::warn!( - target: "trie::proof_task", - worker_id, - ?err, - requested = planned_workers, - spawned_workers, - "Failed to create transaction for storage worker, continuing with fewer workers" - ); - } - } - } - - if spawned_workers == 0 { - tracing::error!( - target: "trie::proof_task", - requested = planned_workers, - "Failed to spawn any storage workers - all work will execute on-demand" - ); - } else if spawned_workers < planned_workers { - tracing::warn!( - target: "trie::proof_task", - requested = planned_workers, - spawned = spawned_workers, - "Spawned fewer storage workers than requested" - ); - } - // Allocate remaining capacity to on-demand pool. - let max_on_demand_txs = max_concurrency.saturating_sub(spawned_workers); + // Allocate remaining capacity to on-demand pool for account trie operations. + let remaining_concurrency = max_concurrency.saturating_sub(spawned_workers); Self { storage_work_tx, storage_worker_count: spawned_workers, - max_on_demand_txs, - on_demand_txs: Vec::with_capacity(max_on_demand_txs), - on_demand_tx_count: 0, - on_demand_queue: VecDeque::new(), + max_concurrency: remaining_concurrency, + total_transactions: 0, + pending_tasks: VecDeque::new(), + proof_task_txs: Vec::with_capacity(remaining_concurrency), view, task_ctx, executor, From 838dc6700e7938cbb2330c6f393a66521cee2b4f Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:43:21 +0000 Subject: [PATCH 18/52] reduce scope of pr - exclude all accs - not change the logic for pending_tasks and proof_tasks_txs (on-demand proofs) and just continue using it for the BlindedAccountNode requests, but start using dedicated storage workers for StorageProof and BlindedStorageNode requests --- .../tree/src/tree/payload_processor/mod.rs | 26 ++- .../src/tree/payload_processor/multiproof.rs | 3 +- .../engine/tree/src/tree/payload_validator.rs | 30 ++- crates/trie/parallel/src/proof.rs | 3 +- crates/trie/parallel/src/proof_task.rs | 215 +++++++----------- 5 files changed, 130 insertions(+), 147 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index bf17d3a1164..18a1978a131 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -45,7 +45,7 @@ use std::sync::{ mpsc::{self, channel, Sender}, Arc, }; -use tracing::{debug, instrument}; +use tracing::{debug, instrument, warn}; mod configured_sparse_trie; pub mod executor; @@ -201,13 +201,28 @@ where let storage_worker_count = (max_proof_task_concurrency / 2) .max(1) .min(max_proof_task_concurrency.saturating_sub(1)); - let proof_task = ProofTaskManager::new( + let proof_task = match ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), task_ctx, max_proof_task_concurrency, storage_worker_count, - ); + ) { + Ok(proof_task) => proof_task, + Err(error) => { + // If we cannot bootstrap the proof task manager, continue with cache prewarming + // only; the caller will detect the missing state root channel and fall back to the + // parallel state root algorithm. + tracing::warn!( + target: "engine::tree", + ?error, + max_concurrency = max_proof_task_concurrency, + requested_workers = storage_worker_count, + "Failed to initialize proof task manager, falling back to cache-only mode" + ); + return self.spawn_cache_exclusive(env, transactions, provider_builder); + } + }; // We set it to half of the proof task concurrency, because often for each multiproof we // spawn one Tokio task for the account proof, and one Tokio task for the storage proof. @@ -472,6 +487,11 @@ impl PayloadHandle { .map_err(|_| ParallelStateRootError::Other("sparse trie task dropped".to_string()))? } + /// Returns `true` if the handle is connected to a background state root task. + pub fn supports_state_root(&self) -> bool { + self.state_root.is_some() + } + /// Returns a state hook to be used to send state updates to this task. /// /// If a multiproof task is spawned the hook will notify it about new states. diff --git a/crates/engine/tree/src/tree/payload_processor/multiproof.rs b/crates/engine/tree/src/tree/payload_processor/multiproof.rs index 1f7acbf4d75..b36fcae4f46 100644 --- a/crates/engine/tree/src/tree/payload_processor/multiproof.rs +++ b/crates/engine/tree/src/tree/payload_processor/multiproof.rs @@ -1237,7 +1237,8 @@ mod tests { task_ctx, 1, 1, - ); + ) + .unwrap(); let channel = channel(); MultiProofTask::new(config, executor, proof_task.handle(), channel.0, 1, None) diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index cd2c37d1e91..4842f67cd71 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -877,17 +877,25 @@ where // too expensive because it requires walking all paths in every proof. let spawn_start = Instant::now(); let (handle, strategy) = if trie_input.prefix_sets.is_empty() { - ( - self.payload_processor.spawn( - env, - txs, - provider_builder, - consistent_view, - trie_input, - &self.config, - ), - StateRootStrategy::StateRootTask, - ) + let handle = self.payload_processor.spawn( + env, + txs, + provider_builder, + consistent_view, + trie_input, + &self.config, + ); + let strategy = if handle.supports_state_root() { + StateRootStrategy::StateRootTask + } else { + debug!( + target: "engine::tree", + block=?block_num_hash, + "Proof task initialization failed, falling back to parallel state root" + ); + StateRootStrategy::Parallel + }; + (handle, strategy) // if prefix sets are not empty, we spawn a task that exclusively handles cache // prewarming for transaction execution } else { diff --git a/crates/trie/parallel/src/proof.rs b/crates/trie/parallel/src/proof.rs index 4c9a3a57cf8..4a2738fd38e 100644 --- a/crates/trie/parallel/src/proof.rs +++ b/crates/trie/parallel/src/proof.rs @@ -448,7 +448,8 @@ mod tests { let task_ctx = ProofTaskCtx::new(Default::default(), Default::default(), Default::default()); let proof_task = - ProofTaskManager::new(rt.handle().clone(), consistent_view.clone(), task_ctx, 1, 1); + ProofTaskManager::new(rt.handle().clone(), consistent_view.clone(), task_ctx, 1, 1) + .unwrap(); let proof_task_handle = proof_task.handle(); // keep the join handle around to make sure it does not return any errors diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 58669642fac..1073fb16ea1 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -41,7 +41,7 @@ use std::{ time::Instant, }; use tokio::runtime::Handle; -use tracing::{debug, trace}; +use tracing::trace; #[cfg(feature = "metrics")] use crate::proof_task_metrics::ProofTaskMetrics; @@ -70,14 +70,15 @@ struct StorageProofJob { /// /// This manager handles two distinct execution paths: /// -/// 1. **Storage Worker Pool**: +/// 1. **Storage Worker Pool** (for storage trie operations): /// - Pre-spawned workers with dedicated long-lived transactions +/// - Handles `StorageProof` and `BlindedStorageNode` requests /// - Tasks queued via crossbeam unbounded channel /// - Workers continuously process without transaction overhead /// - Unbounded queue ensures all storage proofs benefit from transaction reuse /// -/// 2. **On-Demand Execution**: -/// - Lazy transaction creation for blinded node fetches +/// 2. **On-Demand Execution** (for account trie operations): +/// - Lazy transaction creation for `BlindedAccountNode` requests /// - Transactions returned to pool after use for reuse /// /// # Public Interface @@ -93,23 +94,21 @@ pub struct ProofTaskManager { /// Number of storage workers successfully spawned. /// - /// May be less than requested if transaction creation fails. + /// May be less than requested if concurrency limits reduce the worker budget. storage_worker_count: usize, - /// Maximum number of on-demand transactions for blinded node fetches. - max_on_demand_txs: usize, + /// Max number of database transactions to create for on-demand account trie operations. + max_concurrency: usize, - /// On-demand transaction pool for blinded node fetches. - on_demand_txs: Vec>>, + /// Number of database transactions created for on-demand operations. + total_transactions: usize, - /// Total on-demand transactions created (for ID assignment). - on_demand_tx_count: usize, + /// Proof tasks pending execution (account trie operations only). + pending_tasks: VecDeque, - /// Queue of tasks waiting for on-demand transaction assignment. - /// - /// Holds `ProofTaskKind` for both blinded node fetches and storage proof - /// fallbacks (when worker pool is full/unavailable). - on_demand_queue: VecDeque, + /// The proof task transactions, containing owned cursor factories that are reused for proof + /// calculation (account trie operations only). + proof_task_txs: Vec>>, /// Consistent view provider used for creating transactions on-demand. view: ConsistentDbView, @@ -235,18 +234,21 @@ where /// The `max_concurrency` budget is split between pre-spawned storage workers and an /// on-demand pool. At least one slot is always reserved for on-demand, so the actual /// number of workers spawned is `min(storage_worker_count, max_concurrency - 1)`. - /// If workers fail to spawn, the system continues with fewer workers. + /// Returns an error if the underlying provider fails to create the transactions required for + /// spawning workers. pub fn new( executor: Handle, view: ConsistentDbView, task_ctx: ProofTaskCtx, max_concurrency: usize, storage_worker_count: usize, - ) -> Self { + ) -> ProviderResult { let (tx_sender, proof_task_rx) = channel(); - let worker_budget = max_concurrency.saturating_sub(1); - let planned_workers = storage_worker_count.min(worker_budget); + // Calculate how many workers to spawn. + // We use the full max_concurrency for workers if needed, since on-demand operations + // (BlindedAccountNode) are less frequent and can share the budget. + let planned_workers = storage_worker_count.min(max_concurrency); if planned_workers < storage_worker_count { tracing::debug!( @@ -286,26 +288,26 @@ where } }; - let tx = provider_ro.into_tx(); - let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); - let work_rx = storage_work_rx.clone(); + let tx = provider_ro.into_tx(); + let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); + let work_rx = storage_work_rx.clone(); executor.spawn_blocking(move || storage_worker_loop(proof_task_tx, work_rx, worker_id)); - spawned_workers += 1; + spawned_workers += 1; - tracing::debug!( - target: "trie::proof_task", - worker_id, - spawned_workers, - "Storage worker spawned successfully" - ); - } + tracing::debug!( + target: "trie::proof_task", + worker_id, + spawned_workers, + "Storage worker spawned successfully" + ); + } // Allocate remaining capacity to on-demand pool for account trie operations. let remaining_concurrency = max_concurrency.saturating_sub(spawned_workers); - Self { + Ok(Self { storage_work_tx, storage_worker_count: spawned_workers, max_concurrency: remaining_concurrency, @@ -321,7 +323,7 @@ where #[cfg(feature = "metrics")] metrics: ProofTaskMetrics::default(), - } + }) } /// Returns a handle for sending new proof tasks to the [`ProofTaskManager`]. @@ -336,22 +338,22 @@ where { /// Inserts the task into the pending tasks queue. pub fn queue_proof_task(&mut self, task: ProofTaskKind) { - self.on_demand_queue.push_back(task); + self.pending_tasks.push_back(task); } /// Gets either the next available transaction, or creates a new one if all are in use and the /// total number of transactions created is less than the max concurrency. pub fn get_or_create_tx(&mut self) -> ProviderResult>>> { - if let Some(proof_task_tx) = self.on_demand_txs.pop() { + if let Some(proof_task_tx) = self.proof_task_txs.pop() { return Ok(Some(proof_task_tx)); } // if we can create a new tx within our concurrency limits, create one on-demand - if self.on_demand_tx_count < self.max_on_demand_txs { + if self.total_transactions < self.max_concurrency { let provider_ro = self.view.provider_ro()?; let tx = provider_ro.into_tx(); - self.on_demand_tx_count += 1; - return Ok(Some(ProofTaskTx::new(tx, self.task_ctx.clone(), self.on_demand_tx_count))); + self.total_transactions += 1; + return Ok(Some(ProofTaskTx::new(tx, self.task_ctx.clone(), self.total_transactions))); } Ok(None) @@ -363,25 +365,26 @@ where /// This will return an error if a transaction must be created on-demand and the consistent view /// provider fails. pub fn try_spawn_next(&mut self) -> ProviderResult<()> { - let Some(task) = self.on_demand_queue.pop_front() else { return Ok(()) }; + let Some(task) = self.pending_tasks.pop_front() else { return Ok(()) }; let Some(proof_task_tx) = self.get_or_create_tx()? else { // if there are no txs available, requeue the proof task - self.on_demand_queue.push_front(task); + self.pending_tasks.push_front(task); return Ok(()) }; let tx_sender = self.tx_sender.clone(); self.executor.spawn_blocking(move || match task { - ProofTaskKind::StorageProof(input, sender) => { - proof_task_tx.storage_proof(input, sender, tx_sender); - } ProofTaskKind::BlindedAccountNode(path, sender) => { proof_task_tx.blinded_account_node(path, sender, tx_sender); } ProofTaskKind::BlindedStorageNode(account, path, sender) => { proof_task_tx.blinded_storage_node(account, path, sender, tx_sender); } + // StorageProof should never reach here as it's routed to worker pool + ProofTaskKind::StorageProof(_, _) => { + unreachable!("StorageProof should be routed to worker pool") + } }); Ok(()) @@ -391,9 +394,11 @@ where /// /// # Task Routing /// - /// - **Storage Proofs**: Routed to pre-spawned worker pool via unbounded channel. Only falls - /// back to on-demand if workers are disconnected (e.g., all workers panicked). - /// - **Blinded Nodes**: Queued for on-demand execution. + /// - **Storage Trie Operations** (`StorageProof` and `BlindedStorageNode`): Routed to + /// pre-spawned worker pool via unbounded channel. Only falls back to `pending_tasks` if + /// workers are disconnected (e.g., all workers panicked). + /// - **Account Trie Operations** (`BlindedAccountNode`): Queued for on-demand execution via + /// `pending_tasks`. /// /// # Shutdown /// @@ -420,37 +425,43 @@ where tracing::warn!( target: "trie::proof_task", storage_worker_count = self.storage_worker_count, - "Worker pool disconnected (no workers available), falling back to on-demand" + "Worker pool disconnected, cannot process storage proof" ); - self.on_demand_queue.push_back( - ProofTaskKind::StorageProof( - job.input, - job.result_sender, + // Send error back to caller + let _ = job.result_sender.send(Err( + ParallelStateRootError::Other( + "Storage proof worker pool unavailable".to_string(), ), - ); + )); } } } - ProofTaskKind::BlindedAccountNode(_, _) => { + ProofTaskKind::BlindedStorageNode(account, path, sender) => { + // Route storage trie operations to worker pool + // For now, queue to pending_tasks until we add worker pool support #[cfg(feature = "metrics")] { - self.metrics.account_nodes += 1; + self.metrics.storage_nodes += 1; } - self.queue_proof_task(task); + self.queue_proof_task(ProofTaskKind::BlindedStorageNode( + account, path, sender, + )); } - ProofTaskKind::BlindedStorageNode(_, _, _) => { + + ProofTaskKind::BlindedAccountNode(_, _) => { + // Route account trie operations to pending_tasks #[cfg(feature = "metrics")] { - self.metrics.storage_nodes += 1; + self.metrics.account_nodes += 1; } self.queue_proof_task(task); } }, ProofTaskMessage::Transaction(tx) => { - // Return transaction to on-demand pool - self.on_demand_txs.push(tx); + // Return transaction to pending_tasks pool + self.proof_task_txs.push(tx); } ProofTaskMessage::Terminate => { // Drop storage_work_tx to signal workers to shut down @@ -475,7 +486,7 @@ where Err(_) => return Ok(()), }; - // Try spawning on-demand tasks only (storage proofs handled by worker pool) + // Try spawning pending account trie tasks self.try_spawn_next()?; } } @@ -566,7 +577,7 @@ where hashed_address = ?hashed_address, worker_id = self.id, ); - let _guard = span.enter(); + let _span_guard = span.enter(); let proof_start = Instant::now(); @@ -603,64 +614,6 @@ where decoded_result } - /// Calculates a storage proof for the given hashed address, and desired prefix set. - /// - /// **ON-DEMAND VARIANT** - Consumes self, returns transaction to pool. - /// - /// This method is NO LONGER CALLED for storage proofs from the worker pool, - /// but is kept for: - /// 1. Backward compatibility with any direct callers - /// 2. Future use cases that need one-off storage proofs - /// 3. Tests that rely on the transaction return mechanism - fn storage_proof( - self, - input: StorageProofInput, - result_sender: Sender, - tx_sender: Sender>, - ) { - trace!( - target: "trie::proof_task", - hashed_address = ?input.hashed_address, - "Starting storage proof task calculation" - ); - - let hashed_address = input.hashed_address; - let prefix_set_len = input.prefix_set.len(); - let target_slots_len = input.target_slots.len(); - let proof_start = Instant::now(); - - // Create factories - let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); - let proof_result = - self.compute_storage_proof(input, &trie_cursor_factory, &hashed_cursor_factory); - let proof_time = proof_start.elapsed(); - let success = proof_result.is_ok(); - trace!( - target: "trie::proof_task", - hashed_address = ?hashed_address, - prefix_set = ?prefix_set_len, - target_slots = ?target_slots_len, - proof_time = ?proof_time, - worker_id = self.id, - success, - "Completed storage proof task calculation" - ); - - // send the result back - if let Err(error) = result_sender.send(proof_result) { - debug!( - target: "trie::proof_task", - hashed_address = ?hashed_address, - ?error, - task_time = ?proof_time, - "Storage proof receiver is dropped, discarding the result" - ); - } - - // send the tx back - let _ = tx_sender.send(ProofTaskMessage::Transaction(self)); - } - /// Retrieves blinded account node by path. fn blinded_account_node( self, @@ -954,8 +907,7 @@ mod tests { ) } - /// Ensures the storage worker pool plus on-demand pool never exceed the requested concurrency - /// when the storage worker count saturates the budget. + /// Ensures the storage workers are capped by max_concurrency. #[test] fn proof_task_manager_within_concurrency_limit() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); @@ -965,18 +917,18 @@ mod tests { let view = ConsistentDbView::new(factory, None); let ctx = test_ctx(); - let manager = ProofTaskManager::new(handle.clone(), view, ctx, 2, 2); - assert_eq!(manager.storage_worker_count, 1); - assert_eq!(manager.max_on_demand_txs, 1); - assert!(manager.storage_worker_count + manager.max_on_demand_txs <= 2); + let manager = ProofTaskManager::new(handle.clone(), view, ctx, 2, 2).unwrap(); + // With max_concurrency=2 and storage_worker_count=2, we get 2 workers + assert_eq!(manager.storage_worker_count, 2); + // No remaining concurrency for on-demand + assert_eq!(manager.max_concurrency, 0); drop(manager); task::yield_now().await; }); } - /// Ensures the manager falls back to on-demand transactions when the budget only allows a - /// single concurrent transaction. + /// Ensures the manager caps storage workers to max_concurrency when requested count is higher. #[test] fn proof_task_manager_handles_single_concurrency() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); @@ -986,10 +938,11 @@ mod tests { let view = ConsistentDbView::new(factory, None); let ctx = test_ctx(); - let manager = ProofTaskManager::new(handle.clone(), view, ctx, 1, 5); - assert_eq!(manager.storage_worker_count, 0); - assert_eq!(manager.max_on_demand_txs, 1); - assert!(manager.storage_worker_count + manager.max_on_demand_txs <= 1); + let manager = ProofTaskManager::new(handle.clone(), view, ctx, 1, 5).unwrap(); + // With max_concurrency=1 and storage_worker_count=5, we get 1 worker + assert_eq!(manager.storage_worker_count, 1); + // No remaining concurrency for on-demand + assert_eq!(manager.max_concurrency, 0); drop(manager); task::yield_now().await; From 58979459331dfcad657ce609a5e9e49a693bf235 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:46:01 +0000 Subject: [PATCH 19/52] fmt, clippy --- crates/engine/tree/src/tree/payload_processor/mod.rs | 2 +- crates/engine/tree/src/tree/payload_validator.rs | 4 ++++ crates/trie/parallel/src/proof_task.rs | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 18a1978a131..a2d445abb4b 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -488,7 +488,7 @@ impl PayloadHandle { } /// Returns `true` if the handle is connected to a background state root task. - pub fn supports_state_root(&self) -> bool { + pub const fn supports_state_root(&self) -> bool { self.state_root.is_some() } diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index 4842f67cd71..a8886b72a24 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -885,6 +885,10 @@ where trie_input, &self.config, ); + // The payload processor will silently downgrade to cache-only mode if the proof + // task manager fails to initialize (e.g. provider error). Detect that here and + // fall back to the legacy parallel state root computation so we still attempt + // to reuse the caching pipeline. let strategy = if handle.supports_state_root() { StateRootStrategy::StateRootTask } else { diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 1073fb16ea1..7ea84e6f759 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -907,7 +907,7 @@ mod tests { ) } - /// Ensures the storage workers are capped by max_concurrency. + /// Ensures the storage workers are capped by `max_concurrency`. #[test] fn proof_task_manager_within_concurrency_limit() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); @@ -928,7 +928,7 @@ mod tests { }); } - /// Ensures the manager caps storage workers to max_concurrency when requested count is higher. + /// Ensures the manager caps storage workers to `max_concurrency` when requested count is higher. #[test] fn proof_task_manager_handles_single_concurrency() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); From 6b5de7c8deeb3c1367bcfe810e802d484dc9aec6 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:48:16 +0000 Subject: [PATCH 20/52] fmt --- crates/trie/parallel/src/proof_task.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 7ea84e6f759..d5687f01a7e 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -928,7 +928,8 @@ mod tests { }); } - /// Ensures the manager caps storage workers to `max_concurrency` when requested count is higher. + /// Ensures the manager caps storage workers to `max_concurrency` when requested count is + /// higher. #[test] fn proof_task_manager_handles_single_concurrency() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); From 05e0eb8751e2c139a791eff68afb63cfb298ff9f Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:53:56 +0000 Subject: [PATCH 21/52] refactor(proof_task): consolidate blinded storage node with storage proof --- crates/trie/parallel/src/proof_task.rs | 303 +++++++++++++++---------- 1 file changed, 181 insertions(+), 122 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index d5687f01a7e..e772aad4aad 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -49,19 +49,28 @@ use crate::proof_task_metrics::ProofTaskMetrics; type StorageProofResult = Result; type TrieNodeProviderResult = Result, SparseTrieError>; -/// Internal message for storage proof workers. +/// Internal message for storage workers. /// -/// This is NOT exposed publicly. External callers still use `ProofTaskKind::StorageProof` -/// which is routed through the manager's `std::mpsc` channel. +/// This is NOT exposed publicly. External callers use `ProofTaskKind::StorageProof` or +/// `ProofTaskKind::BlindedStorageNode` which are routed through the manager's `std::mpsc` channel. #[derive(Debug)] -struct StorageProofJob { - /// Storage proof input parameters - input: StorageProofInput, - /// Channel to send result back to original caller - /// - /// This is the same `std::mpsc::Sender` that the external caller provided in - /// `ProofTaskKind::StorageProof(input`, sender). - result_sender: Sender, +enum StorageWorkerJob { + /// Storage proof computation request + StorageProof { + /// Storage proof input parameters + input: StorageProofInput, + /// Channel to send result back to original caller + result_sender: Sender, + }, + /// Blinded storage node retrieval request + BlindedStorageNode { + /// Target account + account: B256, + /// Path to the storage node + path: Nibbles, + /// Channel to send result back to original caller + result_sender: Sender, + }, } /// Manager for coordinating proof request execution across different task types. @@ -89,8 +98,8 @@ struct StorageProofJob { /// - Receive consistent return types and error handling #[derive(Debug)] pub struct ProofTaskManager { - /// Sender for storage proof tasks to worker pool. - storage_work_tx: CrossbeamSender, + /// Sender for storage worker jobs to worker pool. + storage_work_tx: CrossbeamSender, /// Number of storage workers successfully spawned. /// @@ -136,20 +145,20 @@ pub struct ProofTaskManager { metrics: ProofTaskMetrics, } -/// Worker loop for storage proof computation. +/// Worker loop for storage trie operations. /// /// # Lifecycle /// /// Each worker: -/// 1. Receives `StorageProofJob` from crossbeam bounded channel -/// 2. Computes proof using its dedicated long-lived transaction +/// 1. Receives `StorageWorkerJob` from crossbeam unbounded channel +/// 2. Computes result using its dedicated long-lived transaction /// 3. Sends result directly to original caller via `std::mpsc` /// 4. Repeats until channel closes (graceful shutdown) /// /// # Transaction Reuse /// -/// Reuses the same transaction across multiple proofs to avoid transaction -/// creation and cursor factory setup overhead. +/// Reuses the same transaction and cursor factories across multiple operations +/// to avoid transaction creation and cursor factory setup overhead. /// /// # Panic Safety /// @@ -161,7 +170,7 @@ pub struct ProofTaskManager { /// Worker shuts down when the crossbeam channel closes (all senders dropped). fn storage_worker_loop( proof_tx: ProofTaskTx, - work_rx: CrossbeamReceiver, + work_rx: CrossbeamReceiver, worker_id: usize, ) where Tx: DbTx, @@ -169,59 +178,113 @@ fn storage_worker_loop( tracing::debug!( target: "trie::proof_task", worker_id, - "Storage proof worker started" + "Storage worker started" ); // Create factories once at worker startup to avoid recreation overhead. let (trie_cursor_factory, hashed_cursor_factory) = proof_tx.create_factories(); - let mut proofs_processed = 0u64; + // Create blinded provider factory once for all blinded node requests + let blinded_provider_factory = ProofTrieNodeProviderFactory::new( + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), + proof_tx.task_ctx.prefix_sets.clone(), + ); - while let Ok(StorageProofJob { input, result_sender }) = work_rx.recv() { - let hashed_address = input.hashed_address; + let mut storage_proofs_processed = 0u64; + let mut storage_nodes_processed = 0u64; + + while let Ok(job) = work_rx.recv() { + match job { + StorageWorkerJob::StorageProof { input, result_sender } => { + let hashed_address = input.hashed_address; + + trace!( + target: "trie::proof_task", + worker_id, + hashed_address = ?hashed_address, + prefix_set_len = input.prefix_set.len(), + target_slots = input.target_slots.len(), + "Processing storage proof" + ); + + let proof_start = Instant::now(); + let result = proof_tx.compute_storage_proof( + input, + &trie_cursor_factory, + &hashed_cursor_factory, + ); + + let proof_elapsed = proof_start.elapsed(); + storage_proofs_processed += 1; + + if result_sender.send(result).is_err() { + tracing::debug!( + target: "trie::proof_task", + worker_id, + hashed_address = ?hashed_address, + storage_proofs_processed, + "Storage proof receiver dropped, discarding result" + ); + } - trace!( - target: "trie::proof_task", - worker_id, - hashed_address = ?hashed_address, - prefix_set_len = input.prefix_set.len(), - target_slots = input.target_slots.len(), - "Processing storage proof" - ); + trace!( + target: "trie::proof_task", + worker_id, + hashed_address = ?hashed_address, + proof_time_us = proof_elapsed.as_micros(), + total_processed = storage_proofs_processed, + "Storage proof completed" + ); + } - let proof_start = Instant::now(); - let result = - proof_tx.compute_storage_proof(input, &trie_cursor_factory, &hashed_cursor_factory); + StorageWorkerJob::BlindedStorageNode { account, path, result_sender } => { + trace!( + target: "trie::proof_task", + worker_id, + ?account, + ?path, + "Processing blinded storage node" + ); - let proof_elapsed = proof_start.elapsed(); - proofs_processed += 1; + let start = Instant::now(); + let result = + blinded_provider_factory.storage_node_provider(account).trie_node(&path); + let elapsed = start.elapsed(); - if result_sender.send(result).is_err() { - tracing::debug!( - target: "trie::proof_task", - worker_id, - hashed_address = ?hashed_address, - proofs_processed, - "Storage proof receiver dropped, discarding result" - ); - } + storage_nodes_processed += 1; - trace!( - target: "trie::proof_task", - worker_id, - hashed_address = ?hashed_address, - proof_time_us = proof_elapsed.as_micros(), - total_processed = proofs_processed, - "Storage proof completed" - ); + if result_sender.send(result).is_err() { + tracing::debug!( + target: "trie::proof_task", + worker_id, + ?account, + ?path, + storage_nodes_processed, + "Blinded storage node receiver dropped, discarding result" + ); + } + + trace!( + target: "trie::proof_task", + worker_id, + ?account, + ?path, + elapsed_us = elapsed.as_micros(), + total_processed = storage_nodes_processed, + "Blinded storage node completed" + ); + } + } } // Channel closed - graceful shutdown tracing::debug!( target: "trie::proof_task", worker_id, - proofs_processed, - "Storage proof worker shutting down" + storage_proofs_processed, + storage_nodes_processed, + "Storage worker shutting down" ); } @@ -260,15 +323,15 @@ where ); } - // Use unbounded channel to ensure all storage proofs are queued to workers. + // Use unbounded channel to ensure all storage operations are queued to workers. // This maintains transaction reuse benefits and avoids fallback to on-demand execution. - let (storage_work_tx, storage_work_rx) = unbounded::(); + let (storage_work_tx, storage_work_rx) = unbounded::(); tracing::info!( target: "trie::proof_task", storage_worker_count = planned_workers, max_concurrency, - "Initializing storage proof worker pool with unbounded queue" + "Initializing storage worker pool with unbounded queue" ); let mut spawned_workers = 0; @@ -378,10 +441,10 @@ where ProofTaskKind::BlindedAccountNode(path, sender) => { proof_task_tx.blinded_account_node(path, sender, tx_sender); } - ProofTaskKind::BlindedStorageNode(account, path, sender) => { - proof_task_tx.blinded_storage_node(account, path, sender, tx_sender); + // Storage trie operations should never reach here as they're routed to worker pool + ProofTaskKind::BlindedStorageNode(_, _, _) => { + unreachable!("BlindedStorageNode should be routed to worker pool") } - // StorageProof should never reach here as it's routed to worker pool ProofTaskKind::StorageProof(_, _) => { unreachable!("StorageProof should be routed to worker pool") } @@ -411,10 +474,10 @@ where match message { ProofTaskMessage::QueueTask(task) => match task { ProofTaskKind::StorageProof(input, sender) => { - match self - .storage_work_tx - .send(StorageProofJob { input, result_sender: sender }) - { + match self.storage_work_tx.send(StorageWorkerJob::StorageProof { + input, + result_sender: sender, + }) { Ok(_) => { tracing::trace!( target: "trie::proof_task", @@ -429,25 +492,68 @@ where ); // Send error back to caller - let _ = job.result_sender.send(Err( - ParallelStateRootError::Other( - "Storage proof worker pool unavailable".to_string(), - ), - )); + if let StorageWorkerJob::StorageProof { + result_sender, + .. + } = job + { + let _ = result_sender.send(Err( + ParallelStateRootError::Other( + "Storage proof worker pool unavailable" + .to_string(), + ), + )); + } } } } ProofTaskKind::BlindedStorageNode(account, path, sender) => { - // Route storage trie operations to worker pool - // For now, queue to pending_tasks until we add worker pool support #[cfg(feature = "metrics")] { self.metrics.storage_nodes += 1; } - self.queue_proof_task(ProofTaskKind::BlindedStorageNode( - account, path, sender, - )); + + match self.storage_work_tx.send( + StorageWorkerJob::BlindedStorageNode { + account, + path, + result_sender: sender, + }, + ) { + Ok(_) => { + tracing::trace!( + target: "trie::proof_task", + ?account, + ?path, + "Blinded storage node dispatched to worker pool" + ); + } + Err(crossbeam_channel::SendError(job)) => { + tracing::warn!( + target: "trie::proof_task", + storage_worker_count = self.storage_worker_count, + ?account, + ?path, + "Worker pool disconnected, cannot process blinded storage node" + ); + + // Send error back to caller + if let StorageWorkerJob::BlindedStorageNode { + result_sender, + .. + } = job + { + let _ = result_sender.send(Err(SparseTrieError::from( + Box::new(std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "Storage worker pool unavailable", + )) + as Box, + ))); + } + } + } } ProofTaskKind::BlindedAccountNode(_, _) => { @@ -656,53 +762,6 @@ where // send the tx back let _ = tx_sender.send(ProofTaskMessage::Transaction(self)); } - - /// Retrieves blinded storage node of the given account by path. - fn blinded_storage_node( - self, - account: B256, - path: Nibbles, - result_sender: Sender, - tx_sender: Sender>, - ) { - trace!( - target: "trie::proof_task", - ?account, - ?path, - "Starting blinded storage node retrieval" - ); - - let (trie_cursor_factory, hashed_cursor_factory) = self.create_factories(); - - let blinded_provider_factory = ProofTrieNodeProviderFactory::new( - trie_cursor_factory, - hashed_cursor_factory, - self.task_ctx.prefix_sets.clone(), - ); - - let start = Instant::now(); - let result = blinded_provider_factory.storage_node_provider(account).trie_node(&path); - trace!( - target: "trie::proof_task", - ?account, - ?path, - elapsed = ?start.elapsed(), - "Completed blinded storage node retrieval" - ); - - if let Err(error) = result_sender.send(result) { - tracing::error!( - target: "trie::proof_task", - ?account, - ?path, - ?error, - "Failed to send blinded storage node result" - ); - } - - // send the tx back - let _ = tx_sender.send(ProofTaskMessage::Transaction(self)); - } } /// This represents an input for a storage proof. From 4829de9d28811e5d797f189be81a2bc42a788350 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 12:58:17 +0000 Subject: [PATCH 22/52] rm comment --- crates/trie/parallel/src/proof_task.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index e772aad4aad..b3ab50c305a 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -278,7 +278,6 @@ fn storage_worker_loop( } } - // Channel closed - graceful shutdown tracing::debug!( target: "trie::proof_task", worker_id, From 6472cfe80baf0f4302125e249883d7128513c7aa Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 13:02:20 +0000 Subject: [PATCH 23/52] simplify worker concurrency --- crates/trie/parallel/src/proof_task.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index b3ab50c305a..06b345af156 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -307,21 +307,8 @@ where ) -> ProviderResult { let (tx_sender, proof_task_rx) = channel(); - // Calculate how many workers to spawn. - // We use the full max_concurrency for workers if needed, since on-demand operations - // (BlindedAccountNode) are less frequent and can share the budget. let planned_workers = storage_worker_count.min(max_concurrency); - if planned_workers < storage_worker_count { - tracing::debug!( - target: "trie::proof_task", - requested = storage_worker_count, - capped = planned_workers, - max_concurrency, - "Adjusted storage worker count to fit concurrency budget" - ); - } - // Use unbounded channel to ensure all storage operations are queued to workers. // This maintains transaction reuse benefits and avoids fallback to on-demand execution. let (storage_work_tx, storage_work_rx) = unbounded::(); From 61ecc9a36640ba5ac3f7c198c6c5840f20b2c882 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 13:04:20 +0000 Subject: [PATCH 24/52] bump to error! --- crates/engine/tree/src/tree/payload_processor/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index a2d445abb4b..7803a01b334 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -213,7 +213,7 @@ where // If we cannot bootstrap the proof task manager, continue with cache prewarming // only; the caller will detect the missing state root channel and fall back to the // parallel state root algorithm. - tracing::warn!( + tracing::error!( target: "engine::tree", ?error, max_concurrency = max_proof_task_concurrency, From 30f6fda3c3da79b355f3c22d8b4a1e73da6963aa Mon Sep 17 00:00:00 2001 From: YK Date: Tue, 7 Oct 2025 21:29:30 +0800 Subject: [PATCH 25/52] Update crates/engine/tree/src/tree/payload_processor/mod.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/engine/tree/src/tree/payload_processor/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 7803a01b334..a2e75bac107 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -199,8 +199,7 @@ where // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and // blinded nodes) let storage_worker_count = (max_proof_task_concurrency / 2) - .max(1) - .min(max_proof_task_concurrency.saturating_sub(1)); + .max(1); let proof_task = match ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), From 46803362c1802849150f581fa600b530793a4f99 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 14:39:39 +0000 Subject: [PATCH 26/52] handle sending error back --- crates/trie/parallel/src/proof_task.rs | 53 ++++++++++++++------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 06b345af156..c1638ac1a91 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -73,6 +73,32 @@ enum StorageWorkerJob { }, } +impl StorageWorkerJob { + /// Sends an error back to the caller when worker pool is unavailable. + /// + /// Returns `Ok(())` if the error was sent successfully, or `Err(())` if the receiver was + /// dropped. + fn send_worker_unavailable_error(&self) -> Result<(), ()> { + match self { + Self::StorageProof { result_sender, .. } => { + let error = ParallelStateRootError::Other( + "Storage proof worker pool unavailable".to_string(), + ); + result_sender.send(Err(error)).map_err(|_| ()) + } + Self::BlindedStorageNode { result_sender, .. } => { + let error = SparseTrieError::from(SparseTrieErrorKind::Other(Box::new( + std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "Storage worker pool unavailable", + ), + ))); + result_sender.send(Err(error)).map_err(|_| ()) + } + } + } +} + /// Manager for coordinating proof request execution across different task types. /// /// # Architecture @@ -478,18 +504,7 @@ where ); // Send error back to caller - if let StorageWorkerJob::StorageProof { - result_sender, - .. - } = job - { - let _ = result_sender.send(Err( - ParallelStateRootError::Other( - "Storage proof worker pool unavailable" - .to_string(), - ), - )); - } + let _ = job.send_worker_unavailable_error(); } } } @@ -525,19 +540,7 @@ where ); // Send error back to caller - if let StorageWorkerJob::BlindedStorageNode { - result_sender, - .. - } = job - { - let _ = result_sender.send(Err(SparseTrieError::from( - Box::new(std::io::Error::new( - std::io::ErrorKind::BrokenPipe, - "Storage worker pool unavailable", - )) - as Box, - ))); - } + let _ = job.send_worker_unavailable_error(); } } } From 58d6f8b276e6fc83f04ee1cd70e4777d7f454fea Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 14:42:48 +0000 Subject: [PATCH 27/52] fmt --- crates/engine/tree/src/tree/payload_processor/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index a2e75bac107..049419a00e5 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -198,8 +198,7 @@ where let max_proof_task_concurrency = config.max_proof_task_concurrency() as usize; // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and // blinded nodes) - let storage_worker_count = (max_proof_task_concurrency / 2) - .max(1); + let storage_worker_count = (max_proof_task_concurrency / 2).max(1); let proof_task = match ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), From 59b0353f77a6bb5c558dfcb49ccdc0f4b1977dce Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 14:49:35 +0000 Subject: [PATCH 28/52] fix fmt --- crates/trie/parallel/src/proof_task.rs | 55 ++++++++------------------ 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index c1638ac1a91..aaeb37f2d96 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -12,7 +12,7 @@ use crate::root::ParallelStateRootError; use alloy_primitives::{map::B256Set, B256}; use crossbeam_channel::{unbounded, Receiver as CrossbeamReceiver, Sender as CrossbeamSender}; use reth_db_api::transaction::DbTx; -use reth_execution_errors::SparseTrieError; +use reth_execution_errors::{SparseTrieError, SparseTrieErrorKind}; use reth_provider::{ providers::ConsistentDbView, BlockReader, DBProvider, DatabaseProviderFactory, FactoryTx, ProviderResult, @@ -319,9 +319,10 @@ where { /// Creates a new [`ProofTaskManager`] with pre-spawned storage proof workers. /// - /// The `max_concurrency` budget is split between pre-spawned storage workers and an - /// on-demand pool. At least one slot is always reserved for on-demand, so the actual - /// number of workers spawned is `min(storage_worker_count, max_concurrency - 1)`. + /// The `storage_worker_count` determines how many storage workers to spawn, and + /// `max_concurrency` determines the limit for on-demand operations (blinded nodes). + /// These are now independent - storage workers are spawned as requested, and on-demand + /// operations use a separate concurrency pool. /// Returns an error if the underlying provider fails to create the transactions required for /// spawning workers. pub fn new( @@ -333,21 +334,19 @@ where ) -> ProviderResult { let (tx_sender, proof_task_rx) = channel(); - let planned_workers = storage_worker_count.min(max_concurrency); - // Use unbounded channel to ensure all storage operations are queued to workers. // This maintains transaction reuse benefits and avoids fallback to on-demand execution. let (storage_work_tx, storage_work_rx) = unbounded::(); tracing::info!( target: "trie::proof_task", - storage_worker_count = planned_workers, + storage_worker_count, max_concurrency, "Initializing storage worker pool with unbounded queue" ); let mut spawned_workers = 0; - for worker_id in 0..planned_workers { + for worker_id in 0..storage_worker_count { let provider_ro = match view.provider_ro() { Ok(provider_ro) => provider_ro, Err(err) => { @@ -355,7 +354,7 @@ where target: "trie::proof_task", worker_id, ?err, - requested = planned_workers, + requested = storage_worker_count, spawned_workers, "Failed to create transaction for storage worker, falling back to on-demand execution" ); @@ -379,8 +378,8 @@ where ); } - // Allocate remaining capacity to on-demand pool for account trie operations. - let remaining_concurrency = max_concurrency.saturating_sub(spawned_workers); + // max_concurrency is now used solely for on-demand pool (account trie operations). + let remaining_concurrency = max_concurrency; Ok(Self { storage_work_tx, @@ -955,31 +954,9 @@ mod tests { ) } - /// Ensures the storage workers are capped by `max_concurrency`. - #[test] - fn proof_task_manager_within_concurrency_limit() { - let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); - runtime.block_on(async { - let handle = tokio::runtime::Handle::current(); - let factory = create_test_provider_factory(); - let view = ConsistentDbView::new(factory, None); - let ctx = test_ctx(); - - let manager = ProofTaskManager::new(handle.clone(), view, ctx, 2, 2).unwrap(); - // With max_concurrency=2 and storage_worker_count=2, we get 2 workers - assert_eq!(manager.storage_worker_count, 2); - // No remaining concurrency for on-demand - assert_eq!(manager.max_concurrency, 0); - - drop(manager); - task::yield_now().await; - }); - } - - /// Ensures the manager caps storage workers to `max_concurrency` when requested count is - /// higher. + /// Ensures max_concurrency is independent of storage workers. #[test] - fn proof_task_manager_handles_single_concurrency() { + fn proof_task_manager_independent_pools() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); runtime.block_on(async { let handle = tokio::runtime::Handle::current(); @@ -988,10 +965,10 @@ mod tests { let ctx = test_ctx(); let manager = ProofTaskManager::new(handle.clone(), view, ctx, 1, 5).unwrap(); - // With max_concurrency=1 and storage_worker_count=5, we get 1 worker - assert_eq!(manager.storage_worker_count, 1); - // No remaining concurrency for on-demand - assert_eq!(manager.max_concurrency, 0); + // With storage_worker_count=5, we get exactly 5 workers + assert_eq!(manager.storage_worker_count, 5); + // max_concurrency=1 is for on-demand operations only + assert_eq!(manager.max_concurrency, 1); drop(manager); task::yield_now().await; From 93c67e8b837ab653495d248e391c8476c564e882 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 14:52:21 +0000 Subject: [PATCH 29/52] Enhance TreeConfig with storage worker count configuration - Added a function to determine the default number of storage worker threads based on available parallelism. - Updated TreeConfig to include a storage_worker_count field, initialized with the default value. - Modified payload processor to utilize the new storage_worker_count instead of a hardcoded value. --- crates/engine/primitives/src/config.rs | 27 ++++++++++++++++++- .../tree/src/tree/payload_processor/mod.rs | 4 +-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index f622f32c893..05d11ec830b 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -6,9 +6,18 @@ pub const DEFAULT_PERSISTENCE_THRESHOLD: u64 = 2; /// How close to the canonical head we persist blocks. pub const DEFAULT_MEMORY_BLOCK_BUFFER_TARGET: u64 = 0; -/// Default maximum concurrency for proof tasks +/// Default maximum concurrency for on-demand proof tasks (blinded nodes) pub const DEFAULT_MAX_PROOF_TASK_CONCURRENCY: u64 = 256; +/// Returns the default number of storage worker threads based on available parallelism. +/// Defaults to half of available parallelism, clamped between 2 and 8. +fn default_storage_worker_count() -> usize { + #[cfg(feature = "std")] + { + std::thread::available_parallelism().map(|n| (n.get() / 2).clamp(2, 8)).unwrap_or(4) + } +} + /// The size of proof targets chunk to spawn in one multiproof calculation. pub const DEFAULT_MULTIPROOF_TASK_CHUNK_SIZE: usize = 10; @@ -109,6 +118,8 @@ pub struct TreeConfig { prewarm_max_concurrency: usize, /// Whether to unwind canonical header to ancestor during forkchoice updates. allow_unwind_canonical_header: bool, + /// Number of storage proof worker threads. + storage_worker_count: usize, } impl Default for TreeConfig { @@ -135,6 +146,7 @@ impl Default for TreeConfig { always_process_payload_attributes_on_canonical_head: false, prewarm_max_concurrency: DEFAULT_PREWARM_MAX_CONCURRENCY, allow_unwind_canonical_header: false, + storage_worker_count: default_storage_worker_count(), } } } @@ -164,6 +176,7 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head: bool, prewarm_max_concurrency: usize, allow_unwind_canonical_header: bool, + storage_worker_count: usize, ) -> Self { assert!(max_proof_task_concurrency > 0, "max_proof_task_concurrency must be at least 1"); Self { @@ -188,6 +201,7 @@ impl TreeConfig { always_process_payload_attributes_on_canonical_head, prewarm_max_concurrency, allow_unwind_canonical_header, + storage_worker_count, } } @@ -454,4 +468,15 @@ impl TreeConfig { pub const fn prewarm_max_concurrency(&self) -> usize { self.prewarm_max_concurrency } + + /// Return the number of storage proof worker threads. + pub const fn storage_worker_count(&self) -> usize { + self.storage_worker_count + } + + /// Setter for the number of storage proof worker threads. + pub const fn with_storage_worker_count(mut self, storage_worker_count: usize) -> Self { + self.storage_worker_count = storage_worker_count; + self + } } diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 049419a00e5..1ea648a20c9 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -196,9 +196,7 @@ where state_root_config.prefix_sets.clone(), ); let max_proof_task_concurrency = config.max_proof_task_concurrency() as usize; - // Default to half of max concurrency, leaving room for on-demand tasks (Accountproof and - // blinded nodes) - let storage_worker_count = (max_proof_task_concurrency / 2).max(1); + let storage_worker_count = config.storage_worker_count(); let proof_task = match ProofTaskManager::new( self.executor.handle().clone(), state_root_config.consistent_view.clone(), From 1954502fddf06d4efc7adb149f25b592bdbfca06 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Tue, 7 Oct 2025 14:53:21 +0000 Subject: [PATCH 30/52] update message --- crates/trie/parallel/src/proof_task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index aaeb37f2d96..da1fb335871 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -356,7 +356,7 @@ where ?err, requested = storage_worker_count, spawned_workers, - "Failed to create transaction for storage worker, falling back to on-demand execution" + "Failed to create transaction for storage worker" ); return Err(err); } From 2429320e294064e226d57d90606e1d7bcf23c5a5 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 01:08:33 +0000 Subject: [PATCH 31/52] refactor(proof_task): use impl bound - Updated function signatures to accept cursor factories as traits instead of specific types, enhancing flexibility. - Simplified the instantiation of storage proofs by removing unnecessary cloning of cursor factories. --- crates/trie/parallel/src/proof_task.rs | 35 ++++++++++---------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index da1fb335871..724bf2733c2 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -18,10 +18,10 @@ use reth_provider::{ ProviderResult, }; use reth_trie::{ - hashed_cursor::HashedPostStateCursorFactory, + hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory}, prefix_set::TriePrefixSetsMut, proof::{ProofTrieNodeProviderFactory, StorageProof}, - trie_cursor::InMemoryTrieCursorFactory, + trie_cursor::{InMemoryTrieCursorFactory, TrieCursorFactory}, updates::TrieUpdatesSorted, DecodedStorageMultiProof, HashedPostStateSorted, Nibbles, }; @@ -237,8 +237,8 @@ fn storage_worker_loop( let proof_start = Instant::now(); let result = proof_tx.compute_storage_proof( input, - &trie_cursor_factory, - &hashed_cursor_factory, + trie_cursor_factory.clone(), + hashed_cursor_factory.clone(), ); let proof_elapsed = proof_start.elapsed(); @@ -642,14 +642,8 @@ where fn compute_storage_proof( &self, input: StorageProofInput, - trie_cursor_factory: &InMemoryTrieCursorFactory< - DatabaseTrieCursorFactory<&Tx>, - &TrieUpdatesSorted, - >, - hashed_cursor_factory: &HashedPostStateCursorFactory< - DatabaseHashedCursorFactory<&Tx>, - &HashedPostStateSorted, - >, + trie_cursor_factory: impl TrieCursorFactory, + hashed_cursor_factory: impl HashedCursorFactory, ) -> StorageProofResult { // Consume the input so we can move large collections (e.g. target slots) without cloning. let StorageProofInput { @@ -676,16 +670,13 @@ where let proof_start = Instant::now(); // Compute raw storage multiproof - let raw_proof_result = StorageProof::new_hashed( - trie_cursor_factory.clone(), - hashed_cursor_factory.clone(), - hashed_address, - ) - .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) - .with_branch_node_masks(with_branch_node_masks) - .with_added_removed_keys(added_removed_keys) - .storage_multiproof(target_slots) - .map_err(|e| ParallelStateRootError::Other(e.to_string())); + let raw_proof_result = + StorageProof::new_hashed(trie_cursor_factory, hashed_cursor_factory, hashed_address) + .with_prefix_set_mut(PrefixSetMut::from(prefix_set.iter().copied())) + .with_branch_node_masks(with_branch_node_masks) + .with_added_removed_keys(added_removed_keys) + .storage_multiproof(target_slots) + .map_err(|e| ParallelStateRootError::Other(e.to_string())); // Decode proof into DecodedStorageMultiProof let decoded_result = raw_proof_result.and_then(|raw_proof| { From 1902b433304d98a6d9cd1aeae2e2d8f5add84546 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 01:30:12 +0000 Subject: [PATCH 32/52] make spawning falliable - addressed alexey comment --- crates/engine/tree/benches/state_root_task.rs | 26 ++++++---- .../tree/src/tree/payload_processor/mod.rs | 47 +++++++++++-------- .../engine/tree/src/tree/payload_validator.rs | 42 ++++++++++------- crates/trie/parallel/src/proof_task.rs | 2 +- 4 files changed, 70 insertions(+), 47 deletions(-) diff --git a/crates/engine/tree/benches/state_root_task.rs b/crates/engine/tree/benches/state_root_task.rs index 9f61e62d2f9..70d9e037e9d 100644 --- a/crates/engine/tree/benches/state_root_task.rs +++ b/crates/engine/tree/benches/state_root_task.rs @@ -228,16 +228,22 @@ fn bench_state_root(c: &mut Criterion) { }, |(genesis_hash, mut payload_processor, provider, state_updates)| { black_box({ - let mut handle = payload_processor.spawn( - Default::default(), - core::iter::empty::< - Result, core::convert::Infallible>, - >(), - StateProviderBuilder::new(provider.clone(), genesis_hash, None), - ConsistentDbView::new_with_latest_tip(provider).unwrap(), - TrieInput::default(), - &TreeConfig::default(), - ); + let mut handle = payload_processor + .spawn( + Default::default(), + core::iter::empty::< + Result< + Recovered, + core::convert::Infallible, + >, + >(), + StateProviderBuilder::new(provider.clone(), genesis_hash, None), + ConsistentDbView::new_with_latest_tip(provider).unwrap(), + TrieInput::default(), + &TreeConfig::default(), + ) + .map_err(|(err, ..)| err) + .expect("failed to spawn payload processor"); let mut state_hook = handle.state_hook(); diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 1ea648a20c9..3febb5498fb 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -166,6 +166,10 @@ where /// /// This returns a handle to await the final state root and to interact with the tasks (e.g. /// canceling) + /// + /// Returns an error with the original transactions iterator if the proof task manager fails to + /// initialize. + #[allow(clippy::type_complexity)] pub fn spawn>( &mut self, env: ExecutionEnv, @@ -174,7 +178,10 @@ where consistent_view: ConsistentDbView

, trie_input: TrieInput, config: &TreeConfig, - ) -> PayloadHandle, I::Tx>, I::Error> + ) -> Result< + PayloadHandle, I::Tx>, I::Error>, + (reth_provider::ProviderError, I, ExecutionEnv, StateProviderBuilder), + > where P: DatabaseProviderFactory + BlockReader @@ -204,19 +211,15 @@ where max_proof_task_concurrency, storage_worker_count, ) { - Ok(proof_task) => proof_task, + Ok(task) => task, Err(error) => { - // If we cannot bootstrap the proof task manager, continue with cache prewarming - // only; the caller will detect the missing state root channel and fall back to the - // parallel state root algorithm. + // Fall back to parallel state root if proof task manager fails to initialize tracing::error!( target: "engine::tree", ?error, - max_concurrency = max_proof_task_concurrency, - requested_workers = storage_worker_count, - "Failed to initialize proof task manager, falling back to cache-only mode" + "Failed to initialize proof task manager, falling back to parallel state root" ); - return self.spawn_cache_exclusive(env, transactions, provider_builder); + return Err((error, transactions, env, provider_builder)); } }; @@ -269,12 +272,12 @@ where } }); - PayloadHandle { + Ok(PayloadHandle { to_multi_proof, prewarm_handle, state_root: Some(state_root_rx), transactions: execution_rx, - } + }) } /// Spawns a task that exclusively handles cache prewarming for transaction execution. @@ -879,14 +882,20 @@ mod tests { PrecompileCacheMap::default(), ); let provider = BlockchainProvider::new(factory).unwrap(); - let mut handle = payload_processor.spawn( - Default::default(), - core::iter::empty::, core::convert::Infallible>>(), - StateProviderBuilder::new(provider.clone(), genesis_hash, None), - ConsistentDbView::new_with_latest_tip(provider).unwrap(), - TrieInput::from_state(hashed_state), - &TreeConfig::default(), - ); + let mut handle = + payload_processor + .spawn( + Default::default(), + core::iter::empty::< + Result, core::convert::Infallible>, + >(), + StateProviderBuilder::new(provider.clone(), genesis_hash, None), + ConsistentDbView::new_with_latest_tip(provider).unwrap(), + TrieInput::from_state(hashed_state), + &TreeConfig::default(), + ) + .map_err(|(err, ..)| err) + .expect("failed to spawn payload processor"); let mut state_hook = handle.state_hook(); diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index a8886b72a24..1e63d29bf79 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -877,29 +877,37 @@ where // too expensive because it requires walking all paths in every proof. let spawn_start = Instant::now(); let (handle, strategy) = if trie_input.prefix_sets.is_empty() { - let handle = self.payload_processor.spawn( + match self.payload_processor.spawn( env, txs, provider_builder, consistent_view, trie_input, &self.config, - ); - // The payload processor will silently downgrade to cache-only mode if the proof - // task manager fails to initialize (e.g. provider error). Detect that here and - // fall back to the legacy parallel state root computation so we still attempt - // to reuse the caching pipeline. - let strategy = if handle.supports_state_root() { - StateRootStrategy::StateRootTask - } else { - debug!( - target: "engine::tree", - block=?block_num_hash, - "Proof task initialization failed, falling back to parallel state root" - ); - StateRootStrategy::Parallel - }; - (handle, strategy) + ) { + Ok(handle) => { + // Successfully spawned with state root task support + (handle, StateRootStrategy::StateRootTask) + } + Err((error, txs, env, provider_builder)) => { + // Failed to initialize proof task manager, fallback to parallel state + // root + error!( + target: "engine::tree", + block=?block_num_hash, + ?error, + "Failed to initialize proof task manager, falling back to parallel state root" + ); + ( + self.payload_processor.spawn_cache_exclusive( + env, + txs, + provider_builder, + ), + StateRootStrategy::Parallel, + ) + } + } // if prefix sets are not empty, we spawn a task that exclusively handles cache // prewarming for transaction execution } else { diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 724bf2733c2..07bc7543e4b 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -945,7 +945,7 @@ mod tests { ) } - /// Ensures max_concurrency is independent of storage workers. + /// Ensures `max_concurrency` is independent of storage workers. #[test] fn proof_task_manager_independent_pools() { let runtime = Builder::new_multi_thread().worker_threads(1).enable_all().build().unwrap(); From 8fb0dd12a42cd5932789f414f29435cdec7d7e66 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 01:31:33 +0000 Subject: [PATCH 33/52] remove error log, as we propogate up --- crates/trie/parallel/src/proof_task.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 07bc7543e4b..3a21b03a434 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -350,14 +350,6 @@ where let provider_ro = match view.provider_ro() { Ok(provider_ro) => provider_ro, Err(err) => { - tracing::error!( - target: "trie::proof_task", - worker_id, - ?err, - requested = storage_worker_count, - spawned_workers, - "Failed to create transaction for storage worker" - ); return Err(err); } }; From e0010d7d5e305a2f627338627f76ead23ca8773c Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 09:40:55 +0800 Subject: [PATCH 34/52] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/engine/primitives/src/config.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index 05d11ec830b..b719c5e10cd 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -16,6 +16,10 @@ fn default_storage_worker_count() -> usize { { std::thread::available_parallelism().map(|n| (n.get() / 2).clamp(2, 8)).unwrap_or(4) } + #[cfg(not(feature = "std"))] + { + 4 + } } /// The size of proof targets chunk to spawn in one multiproof calculation. From 53cd4bae9120745f4cf8fb87d249ba9f864f4baa Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 01:44:37 +0000 Subject: [PATCH 35/52] use expect instead of unwrap --- crates/engine/tree/src/tree/payload_processor/multiproof.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/engine/tree/src/tree/payload_processor/multiproof.rs b/crates/engine/tree/src/tree/payload_processor/multiproof.rs index b36fcae4f46..18d394477fb 100644 --- a/crates/engine/tree/src/tree/payload_processor/multiproof.rs +++ b/crates/engine/tree/src/tree/payload_processor/multiproof.rs @@ -1238,7 +1238,7 @@ mod tests { 1, 1, ) - .unwrap(); + .expect("Failed to create ProofTaskManager"); let channel = channel(); MultiProofTask::new(config, executor, proof_task.handle(), channel.0, 1, None) From e854628b1c4af4621dccd3a2ea96cca147db166a Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 11:12:45 +0800 Subject: [PATCH 36/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/trie/parallel/src/proof_task.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 3a21b03a434..286b48d7373 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -347,12 +347,7 @@ where let mut spawned_workers = 0; for worker_id in 0..storage_worker_count { - let provider_ro = match view.provider_ro() { - Ok(provider_ro) => provider_ro, - Err(err) => { - return Err(err); - } - }; + let provider_ro = view.provider_ro()?; let tx = provider_ro.into_tx(); let proof_task_tx = ProofTaskTx::new(tx, task_ctx.clone(), worker_id); From 3b17cc75af3e241e1628484a54161cc9f689dff3 Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 11:12:59 +0800 Subject: [PATCH 37/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/trie/parallel/src/proof_task.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 286b48d7373..5db778f1679 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -320,9 +320,9 @@ where /// Creates a new [`ProofTaskManager`] with pre-spawned storage proof workers. /// /// The `storage_worker_count` determines how many storage workers to spawn, and - /// `max_concurrency` determines the limit for on-demand operations (blinded nodes). + /// `max_concurrency` determines the limit for on-demand operations (blinded account nodes). /// These are now independent - storage workers are spawned as requested, and on-demand - /// operations use a separate concurrency pool. + /// operations use a separate concurrency pool for blinded account nodes. /// Returns an error if the underlying provider fails to create the transactions required for /// spawning workers. pub fn new( From 6c89cf4dc954fce44091ba7ecc22bead45bbb392 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 03:11:51 +0000 Subject: [PATCH 38/52] consolidate --- crates/trie/parallel/src/proof_task.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 5db778f1679..f06c20f3205 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -440,11 +440,8 @@ where proof_task_tx.blinded_account_node(path, sender, tx_sender); } // Storage trie operations should never reach here as they're routed to worker pool - ProofTaskKind::BlindedStorageNode(_, _, _) => { - unreachable!("BlindedStorageNode should be routed to worker pool") - } - ProofTaskKind::StorageProof(_, _) => { - unreachable!("StorageProof should be routed to worker pool") + ProofTaskKind::BlindedStorageNode(_, _, _) | ProofTaskKind::StorageProof(_, _) => { + unreachable!("Storage trie operations should be routed to worker pool") } }); From c5f6eb9a58675f24bc937aac8e2c648fa209c0e2 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 03:15:51 +0000 Subject: [PATCH 39/52] removed the unnecessary remaining_concurrency variable allocation --- crates/trie/parallel/src/proof_task.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index f06c20f3205..24b5dd4b13e 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -365,16 +365,14 @@ where ); } - // max_concurrency is now used solely for on-demand pool (account trie operations). - let remaining_concurrency = max_concurrency; - Ok(Self { storage_work_tx, storage_worker_count: spawned_workers, - max_concurrency: remaining_concurrency, + max_concurrency, total_transactions: 0, pending_tasks: VecDeque::new(), - proof_task_txs: Vec::with_capacity(remaining_concurrency), + proof_task_txs: Vec::with_capacity(max_concurrency), /* used for on-demand account + * trie operations */ view, task_ctx, executor, From af73c7a35e86f343df8b1ffb242b5f5eecc3f772 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 03:26:19 +0000 Subject: [PATCH 40/52] clippy --- crates/storage/provider/src/providers/blockchain_provider.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/storage/provider/src/providers/blockchain_provider.rs b/crates/storage/provider/src/providers/blockchain_provider.rs index 890b98124a5..d729d1bfa0a 100644 --- a/crates/storage/provider/src/providers/blockchain_provider.rs +++ b/crates/storage/provider/src/providers/blockchain_provider.rs @@ -2272,7 +2272,7 @@ mod tests { // Invalid/Non-existent argument should return `None` { - call_method!($arg_count, provider, $method, |_,_,_,_| ( ($invalid_args, None)), tx_num, tx_hash, &in_memory_blocks[0], &receipts); + call_method!($arg_count, provider, $method, |_,_,_,_| ($invalid_args, None), tx_num, tx_hash, &in_memory_blocks[0], &receipts); } // Check that the item is only in memory and not in database @@ -2283,7 +2283,7 @@ mod tests { call_method!($arg_count, provider, $method, |_,_,_,_| (args.clone(), expected_item), tx_num, tx_hash, last_mem_block, &receipts); // Ensure the item is not in storage - call_method!($arg_count, provider.database, $method, |_,_,_,_| ( (args, None)), tx_num, tx_hash, last_mem_block, &receipts); + call_method!($arg_count, provider.database, $method, |_,_,_,_| (args, None), tx_num, tx_hash, last_mem_block, &receipts); } )* }}; From a8e52bcbc0bc9d0c9c287accbc5ec11327cd29d3 Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 16:03:30 +0800 Subject: [PATCH 41/52] Apply suggestion from @yongkangc --- crates/trie/parallel/src/proof_task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 24b5dd4b13e..00b252f2fe1 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -478,7 +478,7 @@ where ); } Err(crossbeam_channel::SendError(job)) => { - tracing::warn!( + tracing::error!( target: "trie::proof_task", storage_worker_count = self.storage_worker_count, "Worker pool disconnected, cannot process storage proof" From c48b3285db5c55dcee09becb97e6905689a7a8dd Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 10:30:26 +0000 Subject: [PATCH 42/52] address brian's pr --- crates/engine/tree/src/tree/payload_processor/mod.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs index 3febb5498fb..d449031606e 100644 --- a/crates/engine/tree/src/tree/payload_processor/mod.rs +++ b/crates/engine/tree/src/tree/payload_processor/mod.rs @@ -213,12 +213,6 @@ where ) { Ok(task) => task, Err(error) => { - // Fall back to parallel state root if proof task manager fails to initialize - tracing::error!( - target: "engine::tree", - ?error, - "Failed to initialize proof task manager, falling back to parallel state root" - ); return Err((error, transactions, env, provider_builder)); } }; @@ -486,11 +480,6 @@ impl PayloadHandle { .map_err(|_| ParallelStateRootError::Other("sparse trie task dropped".to_string()))? } - /// Returns `true` if the handle is connected to a background state root task. - pub const fn supports_state_root(&self) -> bool { - self.state_root.is_some() - } - /// Returns a state hook to be used to send state updates to this task. /// /// If a multiproof task is spawned the hook will notify it about new states. From 7efff3d4e5215c0553ab09636b044bbec8982fea Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 21:14:37 +0800 Subject: [PATCH 43/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Alexey Shekhirin <5773434+shekhirin@users.noreply.github.com> --- crates/trie/parallel/src/proof_task.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 00b252f2fe1..2212d7652c7 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -371,8 +371,7 @@ where max_concurrency, total_transactions: 0, pending_tasks: VecDeque::new(), - proof_task_txs: Vec::with_capacity(max_concurrency), /* used for on-demand account - * trie operations */ + proof_task_txs: Vec::with_capacity(max_concurrency), view, task_ctx, executor, From f7cd93fb85586038260099ea500be32a66b94629 Mon Sep 17 00:00:00 2001 From: YK Date: Wed, 8 Oct 2025 21:14:58 +0800 Subject: [PATCH 44/52] Update crates/trie/parallel/src/proof_task.rs Co-authored-by: Alexey Shekhirin <5773434+shekhirin@users.noreply.github.com> --- crates/trie/parallel/src/proof_task.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 2212d7652c7..685ec1c085c 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -450,8 +450,7 @@ where /// # Task Routing /// /// - **Storage Trie Operations** (`StorageProof` and `BlindedStorageNode`): Routed to - /// pre-spawned worker pool via unbounded channel. Only falls back to `pending_tasks` if - /// workers are disconnected (e.g., all workers panicked). + /// pre-spawned worker pool via unbounded channel. /// - **Account Trie Operations** (`BlindedAccountNode`): Queued for on-demand execution via /// `pending_tasks`. /// From f823c6bcd94211ab85ab709786ec0ceaf00f8150 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 13:17:40 +0000 Subject: [PATCH 45/52] Refactor error handling in StorageWorkerJob to use a consistent error message format --- crates/trie/parallel/src/proof_task.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 685ec1c085c..866f0f6cb98 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -79,23 +79,19 @@ impl StorageWorkerJob { /// Returns `Ok(())` if the error was sent successfully, or `Err(())` if the receiver was /// dropped. fn send_worker_unavailable_error(&self) -> Result<(), ()> { + let error_msg = "Storage proof worker pool unavailable"; + match self { Self::StorageProof { result_sender, .. } => { - let error = ParallelStateRootError::Other( - "Storage proof worker pool unavailable".to_string(), - ); - result_sender.send(Err(error)).map_err(|_| ()) + result_sender.send(Err(ParallelStateRootError::Other(error_msg.to_string()))) } Self::BlindedStorageNode { result_sender, .. } => { - let error = SparseTrieError::from(SparseTrieErrorKind::Other(Box::new( - std::io::Error::new( - std::io::ErrorKind::BrokenPipe, - "Storage worker pool unavailable", - ), - ))); - result_sender.send(Err(error)).map_err(|_| ()) + result_sender.send(Err(SparseTrieError::from(SparseTrieErrorKind::Other( + Box::new(std::io::Error::new(std::io::ErrorKind::BrokenPipe, error_msg)) + )))) } } + .map_err(|_| ()) } } From 9c08aedb4c20bcde723ec47afb7f30738f3b4c7f Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 13:28:21 +0000 Subject: [PATCH 46/52] Refactor error handling in StorageWorkerJob to use structured error types --- crates/trie/parallel/src/proof_task.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index 866f0f6cb98..ce845e2407e 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -79,15 +79,14 @@ impl StorageWorkerJob { /// Returns `Ok(())` if the error was sent successfully, or `Err(())` if the receiver was /// dropped. fn send_worker_unavailable_error(&self) -> Result<(), ()> { - let error_msg = "Storage proof worker pool unavailable"; + let error = + ParallelStateRootError::Other("Storage proof worker pool unavailable".to_string()); match self { - Self::StorageProof { result_sender, .. } => { - result_sender.send(Err(ParallelStateRootError::Other(error_msg.to_string()))) - } + Self::StorageProof { result_sender, .. } => result_sender.send(Err(error)), Self::BlindedStorageNode { result_sender, .. } => { result_sender.send(Err(SparseTrieError::from(SparseTrieErrorKind::Other( - Box::new(std::io::Error::new(std::io::ErrorKind::BrokenPipe, error_msg)) + Box::new(error), )))) } } From 7f9ec065b2c2c3490734ecfcc5a7c3795ac08dc1 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 13:28:30 +0000 Subject: [PATCH 47/52] fmt, clipy --- crates/trie/parallel/src/proof_task.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index ce845e2407e..fa90e9dad39 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -84,11 +84,8 @@ impl StorageWorkerJob { match self { Self::StorageProof { result_sender, .. } => result_sender.send(Err(error)), - Self::BlindedStorageNode { result_sender, .. } => { - result_sender.send(Err(SparseTrieError::from(SparseTrieErrorKind::Other( - Box::new(error), - )))) - } + Self::BlindedStorageNode { result_sender, .. } => result_sender + .send(Err(SparseTrieError::from(SparseTrieErrorKind::Other(Box::new(error))))), } .map_err(|_| ()) } From 6d413528a0384b2b9217bfacad9dbb99f92544dd Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Wed, 8 Oct 2025 13:30:03 +0000 Subject: [PATCH 48/52] fix --- crates/trie/parallel/src/proof_task.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/trie/parallel/src/proof_task.rs b/crates/trie/parallel/src/proof_task.rs index fa90e9dad39..0c513c55763 100644 --- a/crates/trie/parallel/src/proof_task.rs +++ b/crates/trie/parallel/src/proof_task.rs @@ -83,11 +83,13 @@ impl StorageWorkerJob { ParallelStateRootError::Other("Storage proof worker pool unavailable".to_string()); match self { - Self::StorageProof { result_sender, .. } => result_sender.send(Err(error)), + Self::StorageProof { result_sender, .. } => { + result_sender.send(Err(error)).map_err(|_| ()) + } Self::BlindedStorageNode { result_sender, .. } => result_sender - .send(Err(SparseTrieError::from(SparseTrieErrorKind::Other(Box::new(error))))), + .send(Err(SparseTrieError::from(SparseTrieErrorKind::Other(Box::new(error))))) + .map_err(|_| ()), } - .map_err(|_| ()) } } From 4ca404e51733af40f978a165a682744c2fe7db6a Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Thu, 9 Oct 2025 14:35:13 +0000 Subject: [PATCH 49/52] bump up workers --- crates/engine/primitives/src/config.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs index b719c5e10cd..b2f8da4d424 100644 --- a/crates/engine/primitives/src/config.rs +++ b/crates/engine/primitives/src/config.rs @@ -10,15 +10,14 @@ pub const DEFAULT_MEMORY_BLOCK_BUFFER_TARGET: u64 = 0; pub const DEFAULT_MAX_PROOF_TASK_CONCURRENCY: u64 = 256; /// Returns the default number of storage worker threads based on available parallelism. -/// Defaults to half of available parallelism, clamped between 2 and 8. fn default_storage_worker_count() -> usize { #[cfg(feature = "std")] { - std::thread::available_parallelism().map(|n| (n.get() / 2).clamp(2, 8)).unwrap_or(4) + std::thread::available_parallelism().map(|n| (n.get() * 2).clamp(2, 64)).unwrap_or(8) } #[cfg(not(feature = "std"))] { - 4 + 8 } } From d66ed613973f2a46f82ddf88fa61bcb7cec61bf3 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Thu, 9 Oct 2025 14:57:53 +0000 Subject: [PATCH 50/52] cli flag for storage --- crates/node/core/src/args/engine.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/crates/node/core/src/args/engine.rs b/crates/node/core/src/args/engine.rs index 88179a6b40e..5acf47ae740 100644 --- a/crates/node/core/src/args/engine.rs +++ b/crates/node/core/src/args/engine.rs @@ -108,6 +108,11 @@ pub struct EngineArgs { /// See `TreeConfig::unwind_canonical_header` for more details. #[arg(long = "engine.allow-unwind-canonical-header", default_value = "false")] pub allow_unwind_canonical_header: bool, + + /// Configure the number of storage proof worker threads. + /// If not specified, defaults to 2x available parallelism, clamped between 2 and 64. + #[arg(long = "engine.storage-worker-count")] + pub storage_worker_count: Option, } #[allow(deprecated)] @@ -134,6 +139,7 @@ impl Default for EngineArgs { state_root_fallback: false, always_process_payload_attributes_on_canonical_head: false, allow_unwind_canonical_header: false, + storage_worker_count: None, } } } @@ -141,7 +147,7 @@ impl Default for EngineArgs { impl EngineArgs { /// Creates a [`TreeConfig`] from the engine arguments. pub fn tree_config(&self) -> TreeConfig { - TreeConfig::default() + let mut config = TreeConfig::default() .with_persistence_threshold(self.persistence_threshold) .with_memory_block_buffer_target(self.memory_block_buffer_target) .with_legacy_state_root(self.legacy_state_root_task_enabled) @@ -159,7 +165,13 @@ impl EngineArgs { .with_always_process_payload_attributes_on_canonical_head( self.always_process_payload_attributes_on_canonical_head, ) - .with_unwind_canonical_header(self.allow_unwind_canonical_header) + .with_unwind_canonical_header(self.allow_unwind_canonical_header); + + if let Some(count) = self.storage_worker_count { + config = config.with_storage_worker_count(count); + } + + config } } From 4aba3de0764bf71b44eb3b3022d743c723a10e52 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Fri, 10 Oct 2025 00:19:27 +0000 Subject: [PATCH 51/52] docs: update CLI reference for storage-worker-count flag --- docs/vocs/docs/pages/cli/reth/node.mdx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx index 2021b342d62..07ba5e5d84e 100644 --- a/docs/vocs/docs/pages/cli/reth/node.mdx +++ b/docs/vocs/docs/pages/cli/reth/node.mdx @@ -864,6 +864,9 @@ Engine: --engine.allow-unwind-canonical-header Allow unwinding canonical header to ancestor during forkchoice updates. See `TreeConfig::unwind_canonical_header` for more details + --engine.storage-worker-count + Configure the number of storage proof worker threads. If not specified, defaults to 2x available parallelism, clamped between 2 and 64 + ERA: --era.enable Enable import from ERA1 files From 8e64738b03bea504fcca0741536f2275f1e41b93 Mon Sep 17 00:00:00 2001 From: Yong Kang Date: Fri, 10 Oct 2025 00:21:52 +0000 Subject: [PATCH 52/52] docs: clarify storage-worker-count uses Tokio blocking pool --- crates/node/core/src/args/engine.rs | 2 +- docs/vocs/docs/pages/cli/reth/node.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/node/core/src/args/engine.rs b/crates/node/core/src/args/engine.rs index 5acf47ae740..2298b28f9ce 100644 --- a/crates/node/core/src/args/engine.rs +++ b/crates/node/core/src/args/engine.rs @@ -109,7 +109,7 @@ pub struct EngineArgs { #[arg(long = "engine.allow-unwind-canonical-header", default_value = "false")] pub allow_unwind_canonical_header: bool, - /// Configure the number of storage proof worker threads. + /// Configure the number of storage proof workers in the Tokio blocking pool. /// If not specified, defaults to 2x available parallelism, clamped between 2 and 64. #[arg(long = "engine.storage-worker-count")] pub storage_worker_count: Option, diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx index 07ba5e5d84e..394854f7246 100644 --- a/docs/vocs/docs/pages/cli/reth/node.mdx +++ b/docs/vocs/docs/pages/cli/reth/node.mdx @@ -865,7 +865,7 @@ Engine: Allow unwinding canonical header to ancestor during forkchoice updates. See `TreeConfig::unwind_canonical_header` for more details --engine.storage-worker-count - Configure the number of storage proof worker threads. If not specified, defaults to 2x available parallelism, clamped between 2 and 64 + Configure the number of storage proof workers in the Tokio blocking pool. If not specified, defaults to 2x available parallelism, clamped between 2 and 64 ERA: --era.enable