From 8aa13570e7e19c169c0463ea83e8d3e655bd0c4e Mon Sep 17 00:00:00 2001
From: Ishika Choudhury <117741714+Rimeeeeee@users.noreply.github.com>
Date: Tue, 21 Apr 2026 13:28:00 +0530
Subject: [PATCH 1/4] chore(BAL): added parallelization and batch io flags

---
 crates/engine/primitives/src/config.rs        | 56 +++++++++++++++++++
 .../tree/src/tree/payload_processor/mod.rs    | 14 ++++-
 .../src/tree/payload_processor/prewarm.rs     |  7 +++
 crates/node/core/src/args/engine.rs           | 47 +++++++++++++++-
 docs/vocs/docs/pages/cli/reth/node.mdx        |  8 +++
 5 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/crates/engine/primitives/src/config.rs b/crates/engine/primitives/src/config.rs
index 3951253f101..89da6419451 100644
--- a/crates/engine/primitives/src/config.rs
+++ b/crates/engine/primitives/src/config.rs
@@ -176,6 +176,17 @@ pub struct TreeConfig {
     /// arrives until the next FCU without attributes. This avoids persistence I/O competing
     /// with block building on latency-sensitive chains.
     suppress_persistence_during_build: bool,
+    /// Whether to disable BAL (Block Access List, EIP-7928) based parallel execution.
+    /// When disabled, falls back to transaction-based prewarming even when a BAL is available.
+    disable_bal_parallel_execution: bool,
+    /// Whether to disable BAL-driven parallel state root computation.
+    /// When disabled, the BAL hashed post state is not sent to the multiproof task for
+    /// early parallel state root computation.
+    disable_bal_parallel_state_root: bool,
+    /// Whether to disable BAL (Block Access List) batched IO during prewarming.
+    /// When disabled, falls back to individual per-slot storage reads instead of
+    /// batched cursor reads via `storage_range`.
+    disable_bal_batch_io: bool,
     /// Maximum random jitter applied before each proof computation (trie-debug only).
     /// When set, each proof worker sleeps for a random duration up to this value
     /// before starting a proof calculation.
@@ -219,6 +230,9 @@ impl Default for TreeConfig {
             share_execution_cache_with_payload_builder: false,
             share_sparse_trie_with_payload_builder: false,
             suppress_persistence_during_build: false,
+            disable_bal_parallel_execution: false,
+            disable_bal_parallel_state_root: false,
+            disable_bal_batch_io: false,
             #[cfg(feature = "trie-debug")]
             proof_jitter: None,
         }
@@ -291,6 +305,9 @@ impl TreeConfig {
             share_execution_cache_with_payload_builder,
             share_sparse_trie_with_payload_builder,
             suppress_persistence_during_build: false,
+            disable_bal_parallel_execution: false,
+            disable_bal_parallel_state_root: false,
+            disable_bal_batch_io: false,
             #[cfg(feature = "trie-debug")]
             proof_jitter: None,
         }
@@ -665,6 +682,45 @@ impl TreeConfig {
         self
     }
 
+    /// Returns whether BAL-based parallel execution is disabled.
+    pub const fn disable_bal_parallel_execution(&self) -> bool {
+        self.disable_bal_parallel_execution
+    }
+
+    /// Setter for whether to disable BAL-based parallel execution.
+    pub const fn without_bal_parallel_execution(
+        mut self,
+        disable_bal_parallel_execution: bool,
+    ) -> Self {
+        self.disable_bal_parallel_execution = disable_bal_parallel_execution;
+        self
+    }
+
+    /// Returns whether BAL-driven parallel state root computation is disabled.
+    pub const fn disable_bal_parallel_state_root(&self) -> bool {
+        self.disable_bal_parallel_state_root
+    }
+
+    /// Setter for whether to disable BAL-driven parallel state root computation.
+    pub const fn without_bal_parallel_state_root(
+        mut self,
+        disable_bal_parallel_state_root: bool,
+    ) -> Self {
+        self.disable_bal_parallel_state_root = disable_bal_parallel_state_root;
+        self
+    }
+
+    /// Returns whether BAL batched IO is disabled.
+    pub const fn disable_bal_batch_io(&self) -> bool {
+        self.disable_bal_batch_io
+    }
+
+    /// Setter for whether to disable BAL batched IO.
+    pub const fn without_bal_batch_io(mut self, disable_bal_batch_io: bool) -> Self {
+        self.disable_bal_batch_io = disable_bal_batch_io;
+        self
+    }
+
     /// Returns the proof jitter duration, if configured (trie-debug only).
     #[cfg(feature = "trie-debug")]
     pub const fn proof_jitter(&self) -> Option<Duration> {
diff --git a/crates/engine/tree/src/tree/payload_processor/mod.rs b/crates/engine/tree/src/tree/payload_processor/mod.rs
index 92ff14890b5..5493583e7dd 100644
--- a/crates/engine/tree/src/tree/payload_processor/mod.rs
+++ b/crates/engine/tree/src/tree/payload_processor/mod.rs
@@ -122,6 +122,12 @@ where
     sparse_trie_max_hot_accounts: usize,
     /// Whether sparse trie cache pruning is fully disabled.
     disable_sparse_trie_cache_pruning: bool,
+    /// Whether to disable BAL-based parallel execution (falls back to tx-based prewarming).
+    disable_bal_parallel_execution: bool,
+    /// Whether to disable BAL-driven parallel state root computation.
+    disable_bal_parallel_state_root: bool,
+    /// Whether BAL batched IO is disabled.
+    disable_bal_batch_io: bool,
 }
 
 impl<N, Evm> PayloadProcessor<Evm>
@@ -157,6 +163,9 @@ where
             disable_sparse_trie_cache_pruning: config.disable_sparse_trie_cache_pruning(),
             cache_metrics: (!config.disable_cache_metrics())
                 .then(|| CachedStateMetrics::zeroed(CachedStateMetricsSource::Engine)),
+            disable_bal_parallel_execution: config.disable_bal_parallel_execution(),
+            disable_bal_parallel_state_root: config.disable_bal_parallel_state_root(),
+            disable_bal_batch_io: config.disable_bal_batch_io(),
         }
     }
 }
@@ -488,6 +497,8 @@ where
             executed_tx_index: Arc::clone(&executed_tx_index),
             precompile_cache_disabled: self.precompile_cache_disabled,
             precompile_cache_map: self.precompile_cache_map.clone(),
+            disable_bal_parallel_state_root: self.disable_bal_parallel_state_root,
+            disable_bal_batch_io: self.disable_bal_batch_io,
         };
 
         let (prewarm_task, to_prewarm_task) = PrewarmCacheTask::new(
@@ -499,10 +510,11 @@ where
 
         {
             let to_prewarm_task = to_prewarm_task.clone();
+            let disable_bal_parallel_execution = self.disable_bal_parallel_execution;
             self.executor.spawn_blocking_named("prewarm", move || {
                 let mode = if skip_prewarm {
                     PrewarmMode::Skipped
-                } else if let Some(bal) = bal {
+                } else if let Some(bal) = bal.filter(|_| !disable_bal_parallel_execution) {
                     PrewarmMode::BlockAccessList(bal)
                 } else {
                     PrewarmMode::Transactions(transactions)
diff --git a/crates/engine/tree/src/tree/payload_processor/prewarm.rs b/crates/engine/tree/src/tree/payload_processor/prewarm.rs
index 1f9a88c2cde..274a0697226 100644
--- a/crates/engine/tree/src/tree/payload_processor/prewarm.rs
+++ b/crates/engine/tree/src/tree/payload_processor/prewarm.rs
@@ -536,6 +536,10 @@ where
     pub precompile_cache_disabled: bool,
     /// The precompile cache map.
     pub precompile_cache_map: PrecompileCacheMap<SpecFor<Evm>>,
+    /// Whether to disable BAL-driven parallel state root computation.
+    pub disable_bal_parallel_state_root: bool,
+    /// Whether BAL batched IO is disabled.
+    pub disable_bal_batch_io: bool,
 }
 
 /// Per-thread EVM state initialised by [`PrewarmContext::evm_for_ctx`] and stored in
@@ -631,6 +635,9 @@ where
         account_changes: &alloy_eip7928::AccountChanges,
         to_sparse_trie_task: &CrossbeamSender<StateRootMessage>,
     ) {
+        if self.disable_bal_parallel_state_root {
+            return;
+        }
         let address = account_changes.address;
         let mut hashed_address = None;
 
diff --git a/crates/node/core/src/args/engine.rs b/crates/node/core/src/args/engine.rs
index 0f79d4d2d59..49921747d3e 100644
--- a/crates/node/core/src/args/engine.rs
+++ b/crates/node/core/src/args/engine.rs
@@ -50,6 +50,8 @@ pub struct DefaultEngineValues {
     share_execution_cache_with_payload_builder: bool,
     share_sparse_trie_with_payload_builder: bool,
     suppress_persistence_during_build: bool,
+    bal_parallel_execution_disabled: bool,
+    bal_parallel_state_root_disabled: bool,
 }
 
 impl DefaultEngineValues {
@@ -233,6 +235,18 @@ impl DefaultEngineValues {
         self.suppress_persistence_during_build = v;
         self
     }
+
+    /// Set whether to disable BAL-based parallel execution by default
+    pub const fn with_bal_parallel_execution_disabled(mut self, v: bool) -> Self {
+        self.bal_parallel_execution_disabled = v;
+        self
+    }
+
+    /// Set whether to disable BAL-driven parallel state root by default
+    pub const fn with_bal_parallel_state_root_disabled(mut self, v: bool) -> Self {
+        self.bal_parallel_state_root_disabled = v;
+        self
+    }
 }
 
 impl Default for DefaultEngineValues {
@@ -266,6 +280,8 @@ impl Default for DefaultEngineValues {
             share_execution_cache_with_payload_builder: false,
             share_sparse_trie_with_payload_builder: false,
             suppress_persistence_during_build: false,
+            bal_parallel_execution_disabled: false,
+            bal_parallel_state_root_disabled: false,
         }
     }
 }
@@ -478,6 +494,21 @@ pub struct EngineArgs {
     )]
     pub suppress_persistence_during_build: bool,
 
+    /// Disable BAL (Block Access List, EIP-7928) based parallel execution. When set, falls back
+    /// to transaction-based prewarming even when a BAL is available.
+    #[arg(long = "engine.disable-bal-parallel-execution", default_value_t = DefaultEngineValues::get_global().bal_parallel_execution_disabled)]
+    pub bal_parallel_execution_disabled: bool,
+
+    /// Disable BAL-driven parallel state root computation. When set, the BAL hashed post state
+    /// is not sent to the multiproof task for early parallel state root computation.
+    #[arg(long = "engine.disable-bal-parallel-state-root", default_value_t = DefaultEngineValues::get_global().bal_parallel_state_root_disabled)]
+    pub bal_parallel_state_root_disabled: bool,
+
+    /// Disable BAL (Block Access List) batched IO during prewarming. When set, falls back
+    /// to individual per-slot storage reads instead of batched cursor reads.
+    #[arg(long = "engine.disable-bal-batch-io", default_value_t = false)]
+    pub disable_bal_batch_io: bool,
+
     /// Add random jitter before each proof computation (trie-debug only).
     /// Each proof worker sleeps for a random duration up to this value before
     /// starting work. Useful for stress-testing timing-sensitive proof logic.
@@ -524,6 +555,8 @@ impl Default for EngineArgs {
             share_execution_cache_with_payload_builder,
             share_sparse_trie_with_payload_builder,
             suppress_persistence_during_build,
+            bal_parallel_execution_disabled,
+            bal_parallel_state_root_disabled,
         } = DefaultEngineValues::get_global().clone();
         Self {
             persistence_threshold,
@@ -560,6 +593,9 @@ impl Default for EngineArgs {
             share_execution_cache_with_payload_builder,
             share_sparse_trie_with_payload_builder,
             suppress_persistence_during_build,
+            bal_parallel_execution_disabled,
+            bal_parallel_state_root_disabled,
+            disable_bal_batch_io: false,
             #[cfg(feature = "trie-debug")]
             proof_jitter: None,
         }
@@ -610,7 +646,10 @@ impl EngineArgs {
             .with_share_sparse_trie_with_payload_builder(
                 self.share_sparse_trie_with_payload_builder,
             )
-            .with_suppress_persistence_during_build(self.suppress_persistence_during_build);
+            .with_suppress_persistence_during_build(self.suppress_persistence_during_build)
+            .without_bal_parallel_execution(self.bal_parallel_execution_disabled)
+            .without_bal_parallel_state_root(self.bal_parallel_state_root_disabled)
+            .without_bal_batch_io(self.disable_bal_batch_io);
         #[cfg(feature = "trie-debug")]
         let config = config.with_proof_jitter(self.proof_jitter);
         config
@@ -672,6 +711,9 @@ mod tests {
             share_execution_cache_with_payload_builder: false,
             share_sparse_trie_with_payload_builder: false,
             suppress_persistence_during_build: false,
+            bal_parallel_execution_disabled: true,
+            bal_parallel_state_root_disabled: true,
+            disable_bal_batch_io: true,
             #[cfg(feature = "trie-debug")]
             proof_jitter: None,
         };
@@ -714,6 +756,9 @@ mod tests {
             "--engine.disable-sparse-trie-cache-pruning",
             "--engine.state-root-task-timeout",
             "2s",
+            "--engine.disable-bal-parallel-execution",
+            "--engine.disable-bal-parallel-state-root",
+            "--engine.disable-bal-batch-io",
         ])
         .args;
 
diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx
index 6f3f18398ef..604f9631f70 100644
--- a/docs/vocs/docs/pages/cli/reth/node.mdx
+++ b/docs/vocs/docs/pages/cli/reth/node.mdx
@@ -1008,7 +1008,15 @@ Engine:
 
       --engine.disable-cache-metrics
           Disable cache metrics recording, which can take up to 50ms with large cached state
+      --engine.disable-bal-parallel-execution
+          Disable BAL (Block Access List, EIP-7928) based parallel execution. When set, falls back to transaction-based prewarming even when a BAL is available
 
+      --engine.disable-bal-parallel-state-root
+          Disable BAL-driven parallel state root computation. When set, the BAL hashed post state is not sent to the multiproof task for early parallel state root computation
+
+      --engine.disable-bal-batch-io
+          Disable BAL (Block Access List) batched IO during prewarming. When set, falls back to individual per-slot storage reads instead of batched cursor reads
+          
       --engine.sparse-trie-max-hot-slots <SPARSE_TRIE_MAX_HOT_SLOTS>
           LFU hot-slot capacity: max storage slots retained across sparse trie prune cycles
 

From 915211181f890ce99a5fd8e1f35ec36c02a8db01 Mon Sep 17 00:00:00 2001
From: Ishika Choudhury <117741714+Rimeeeeee@users.noreply.github.com>
Date: Tue, 21 Apr 2026 14:44:11 +0530
Subject: [PATCH 2/4] chore: book

---
 docs/vocs/docs/pages/cli/reth/node.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx
index 604f9631f70..b0f7accc7db 100644
--- a/docs/vocs/docs/pages/cli/reth/node.mdx
+++ b/docs/vocs/docs/pages/cli/reth/node.mdx
@@ -1008,15 +1008,6 @@ Engine:
 
       --engine.disable-cache-metrics
           Disable cache metrics recording, which can take up to 50ms with large cached state
-      --engine.disable-bal-parallel-execution
-          Disable BAL (Block Access List, EIP-7928) based parallel execution. When set, falls back to transaction-based prewarming even when a BAL is available
-
-      --engine.disable-bal-parallel-state-root
-          Disable BAL-driven parallel state root computation. When set, the BAL hashed post state is not sent to the multiproof task for early parallel state root computation
-
-      --engine.disable-bal-batch-io
-          Disable BAL (Block Access List) batched IO during prewarming. When set, falls back to individual per-slot storage reads instead of batched cursor reads
-          
       --engine.sparse-trie-max-hot-slots <SPARSE_TRIE_MAX_HOT_SLOTS>
           LFU hot-slot capacity: max storage slots retained across sparse trie prune cycles
 
@@ -1069,6 +1060,15 @@ Engine:
 
           When enabled, persistence cycles are deferred from the moment an FCU with payload attributes arrives until the next FCU clears the build. Useful on chains with short block times where persistence I/O can interfere with block building latency.
 
+      --engine.disable-bal-parallel-execution
+          Disable BAL (Block Access List, EIP-7928) based parallel execution. When set, falls back to transaction-based prewarming even when a BAL is available
+
+      --engine.disable-bal-parallel-state-root
+          Disable BAL-driven parallel state root computation. When set, the BAL hashed post state is not sent to the multiproof task for early parallel state root computation
+
+      --engine.disable-bal-batch-io
+          Disable BAL (Block Access List) batched IO during prewarming. When set, falls back to individual per-slot storage reads instead of batched cursor reads
+
 ERA:
       --era.enable
           Enable import from ERA1 files

From 88891cbd1f1c8996d1c2d342caed035816356093 Mon Sep 17 00:00:00 2001
From: Ishika Choudhury <117741714+Rimeeeeee@users.noreply.github.com>
Date: Tue, 21 Apr 2026 15:04:08 +0530
Subject: [PATCH 3/4] chore: book

---
 docs/vocs/docs/pages/cli/reth/node.mdx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx
index b0f7accc7db..b933b9cee49 100644
--- a/docs/vocs/docs/pages/cli/reth/node.mdx
+++ b/docs/vocs/docs/pages/cli/reth/node.mdx
@@ -1008,6 +1008,7 @@ Engine:
 
       --engine.disable-cache-metrics
           Disable cache metrics recording, which can take up to 50ms with large cached state
+      
       --engine.sparse-trie-max-hot-slots <SPARSE_TRIE_MAX_HOT_SLOTS>
           LFU hot-slot capacity: max storage slots retained across sparse trie prune cycles
 

From 04aba619ca05c885ed75a18efc2260f4948e25c1 Mon Sep 17 00:00:00 2001
From: Ishika Choudhury <117741714+Rimeeeeee@users.noreply.github.com>
Date: Tue, 21 Apr 2026 15:09:33 +0530
Subject: [PATCH 4/4] chore: book

---
 docs/vocs/docs/pages/cli/reth/node.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/vocs/docs/pages/cli/reth/node.mdx b/docs/vocs/docs/pages/cli/reth/node.mdx
index b933b9cee49..683a30cd618 100644
--- a/docs/vocs/docs/pages/cli/reth/node.mdx
+++ b/docs/vocs/docs/pages/cli/reth/node.mdx
@@ -1008,7 +1008,7 @@ Engine:
 
       --engine.disable-cache-metrics
           Disable cache metrics recording, which can take up to 50ms with large cached state
-      
+
       --engine.sparse-trie-max-hot-slots <SPARSE_TRIE_MAX_HOT_SLOTS>
           LFU hot-slot capacity: max storage slots retained across sparse trie prune cycles