Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions cmd/ethrex/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,51 @@ pub struct Options {
help_heading = "Node options"
)]
pub force: bool,
#[arg(
long = "rocksdb.block-cache-size",
value_name = "BYTES",
default_value_t = ethrex_storage::DEFAULT_ROCKSDB_BLOCK_CACHE_SIZE_BYTES,
help = "RocksDB shared block cache size in bytes (default 20 GiB). \
Lowering this degrades block-import throughput; see --help for details.",
long_help = "RocksDB shared block cache size in bytes. This single bounded LRU cache \
holds both data blocks AND the per-SST index and bloom-filter blocks \
needed to look them up. Because ethrex enables cache_index_and_filter_blocks, \
this value is the effective upper bound on RocksDB's resident memory footprint.\n\
\n\
Default: 21474836480 bytes (20 GiB). Sized generously on purpose, to comfortably \
hold the filter and index working set on a fully-synced mainnet node (~5 GiB) \
plus the EVM's hot data set during block execution. Total ethrex process \
resident memory at this default is roughly 25-28 GiB on a fully-synced mainnet \
node (cache + memtables + per-block working memory).\n\
\n\
LOWERING THIS VALUE WILL DEGRADE BLOCK-IMPORT PERFORMANCE. The filter and \
index working set is essentially fixed; when the cache cannot hold it plus a \
useful amount of hot data, EVM state reads spill to disk on every miss and \
block execution slows down sharply. Measured on a synced mainnet node:\n\
- 4 GiB cache : ~76% slower than the unbounded baseline (filters monopolize \
the cache, data is constantly evicted)\n\
- 20 GiB cache: at parity with the unbounded baseline (filters + working \
set both fit comfortably; this is the default)\n\
\n\
When to keep or raise the default:\n\
- Production mainnet validators and any node where block-import throughput \
matters should keep the default. Spare RAM is otherwise unused; the cache \
fills lazily up to this ceiling.\n\
- Future-proofing for databases beyond ~1 TB: raising this above 20 GiB \
leaves headroom as the on-disk state grows.\n\
\n\
When to lower it:\n\
- Resource-constrained hosts (testnet nodes, dev VMs, small validators) where \
the lower memory ceiling is worth the throughput cost. The lower bound for \
keeping up with mainnet under head-following load is workload-dependent and \
not currently characterized below 4 GiB.\n\
\n\
Value is in bytes. Example: 21474836480 = 20 * 1024^3 = 20 GiB. The \
ETHREX_ROCKSDB_BLOCK_CACHE_SIZE environment variable has the same effect.",
help_heading = "Storage options",
env = "ETHREX_ROCKSDB_BLOCK_CACHE_SIZE",
)]
pub rocksdb_block_cache_size: usize,
#[arg(long = "syncmode", default_value = "snap", value_name = "SYNC_MODE", value_parser = utils::parse_sync_mode, help = "The way in which the node will sync its state.", long_help = "Can be either \"full\" or \"snap\" with \"snap\" as default value.", help_heading = "P2P options", env = "ETHREX_SYNCMODE")]
pub syncmode: SyncMode,
#[arg(
Expand Down Expand Up @@ -457,6 +502,7 @@ impl Default for Options {
network: Default::default(),
bootnodes: Default::default(),
datadir: Default::default(),
rocksdb_block_cache_size: ethrex_storage::DEFAULT_ROCKSDB_BLOCK_CACHE_SIZE_BYTES,
syncmode: Default::default(),
metrics_addr: "0.0.0.0".to_owned(),
metrics_port: Default::default(),
Expand Down
48 changes: 39 additions & 9 deletions cmd/ethrex/initializers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ use ethrex_p2p::{
types::{NetworkConfig, Node, NodeRecord},
utils::public_key_from_signing_key,
};
use ethrex_storage::{EngineType, Store, error::StoreError, has_valid_db, read_chain_id_from_db};
use ethrex_storage::{
EngineType, Store, StoreConfig, error::StoreError, has_valid_db, read_chain_id_from_db,
};
use local_ip_address::{local_ip, local_ipv6};
use rand::rngs::OsRng;
use secp256k1::SecretKey;
Expand Down Expand Up @@ -143,30 +145,55 @@ pub fn init_metrics(opts: &Options, network: &Network, tracker: TaskTracker) {
tracker.spawn(metrics_api);
}

/// Opens a new or pre-existing Store and loads the initial state provided by the network
/// Opens a new or pre-existing Store with default tunables and loads the initial
/// state provided by the network. See [`init_store_with_config`] for the variant
/// that lets production callers thread CLI-provided storage tunables through.
pub async fn init_store(datadir: impl AsRef<Path>, genesis: Genesis) -> Result<Store, StoreError> {
let mut store = open_store(datadir.as_ref())?;
init_store_with_config(datadir, genesis, StoreConfig::default()).await
}

/// Opens a Store with the supplied [`StoreConfig`] and loads the initial state.
pub async fn init_store_with_config(
datadir: impl AsRef<Path>,
genesis: Genesis,
config: StoreConfig,
) -> Result<Store, StoreError> {
let mut store = open_store_with_config(datadir.as_ref(), config)?;
store.add_initial_state(genesis).await?;
Ok(store)
}

/// Initializes a pre-existing Store
/// Initializes a pre-existing Store with default tunables. See [`load_store_with_config`].
pub async fn load_store(datadir: &Path) -> Result<Store, StoreError> {
let store = open_store(datadir)?;
load_store_with_config(datadir, StoreConfig::default()).await
}

/// Initializes a pre-existing Store, applying the supplied [`StoreConfig`].
pub async fn load_store_with_config(
datadir: &Path,
config: StoreConfig,
) -> Result<Store, StoreError> {
let store = open_store_with_config(datadir, config)?;
store.load_initial_state().await?;
Ok(store)
}

/// Opens a pre-existing Store or creates a new one
/// Opens a pre-existing Store or creates a new one with default tunables.
/// See [`open_store_with_config`].
pub fn open_store(datadir: &Path) -> Result<Store, StoreError> {
open_store_with_config(datadir, StoreConfig::default())
}

/// Opens a pre-existing Store or creates a new one, applying the supplied [`StoreConfig`].
pub fn open_store_with_config(datadir: &Path, config: StoreConfig) -> Result<Store, StoreError> {
if is_memory_datadir(datadir) {
Store::new(datadir, EngineType::InMemory)
Store::new_with_config(datadir, EngineType::InMemory, config)
} else {
#[cfg(feature = "rocksdb")]
let engine_type = EngineType::RocksDB;
#[cfg(feature = "metrics")]
ethrex_metrics::process::set_datadir_path(datadir.to_path_buf());
Store::new(datadir, engine_type)
Store::new_with_config(datadir, engine_type, config)
}
}

Expand Down Expand Up @@ -499,7 +526,10 @@ pub async fn init_l1(
debug!("Preloading KZG trusted setup");
ethrex_crypto::kzg::warm_up_trusted_setup();

let store = match init_store(&datadir, genesis).await {
let store_config = StoreConfig {
rocksdb_block_cache_size: opts.rocksdb_block_cache_size,
};
let store = match init_store_with_config(&datadir, genesis, store_config).await {
Ok(store) => store,
Err(err @ StoreError::IncompatibleDBVersion { .. })
| Err(err @ StoreError::NotFoundDBVersion) => {
Expand Down
9 changes: 7 additions & 2 deletions cmd/ethrex/l2/initializers.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use crate::cli::Options as L1Options;
use crate::initializers::{
self, get_authrpc_socket_addr, get_http_socket_addr, get_local_node_record, get_local_p2p_node,
get_network, get_signer, get_ws_socket_addr, init_blockchain, init_network, init_store,
get_network, get_signer, get_ws_socket_addr, init_blockchain, init_network,
init_store_with_config,
};
use ethrex_storage::StoreConfig;
use crate::l2::{L2Options, SequencerOptions};
use crate::utils::{
NodeConfigFile, get_client_version, get_client_version_string, init_datadir,
Expand Down Expand Up @@ -200,7 +202,10 @@ pub async fn init_l2(
let network = get_network(&opts.node_opts);

let genesis = network.get_genesis()?;
let store = init_store(&datadir, genesis.clone()).await?;
let store_config = StoreConfig {
rocksdb_block_cache_size: opts.node_opts.rocksdb_block_cache_size,
};
let store = init_store_with_config(&datadir, genesis.clone(), store_config).await?;
let rollup_store = init_rollup_store(&rollup_store_dir).await;

let operator_fee_config = get_operator_fee_config(&opts.sequencer_opts)?;
Expand Down
42 changes: 31 additions & 11 deletions crates/storage/backend/rocksdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pub struct RocksDBBackend {
}

impl RocksDBBackend {
pub fn open(path: impl AsRef<Path>) -> Result<Self, StoreError> {
pub fn open(path: impl AsRef<Path>, block_cache_size: usize) -> Result<Self, StoreError> {
// Rocksdb optimizations options
let mut opts = Options::default();
opts.create_if_missing(true);
Expand Down Expand Up @@ -84,9 +84,29 @@ impl RocksDBBackend {
all_cfs_to_open.extend(existing_cfs.iter().cloned());
all_cfs_to_open.extend(TABLES.iter().map(|table| table.to_string()));

// Shared block cache for all column families: caches decompressed SST data
// blocks in userspace, reducing kernel I/O for hot data (trie nodes, accounts).
let block_cache = Cache::new_lru_cache(4 * 1024 * 1024 * 1024); // 4GB
// Shared block cache for all column families. With
// `cache_index_and_filter_blocks(true)` below, this cache holds both data blocks
// and the index/bloom-filter blocks needed to look them up, so its size is the
// effective ceiling on RocksDB's resident memory footprint. The caller chooses
// the size (see the `--rocksdb.block-cache-size` CLI flag); a value that is too
// small relative to the filter + working-set size will degrade block-import
// throughput (filter blocks displace data blocks, EVM reads spill to disk).
let block_cache = Cache::new_lru_cache(block_cache_size);

// Configures a CF's block-based table to keep its index and bloom-filter blocks
// inside the shared (bounded) block cache rather than pinning them per open file.
//
// With `max_open_files(-1)` every SST stays open, and RocksDB's default
// (`cache_index_and_filter_blocks = false`) pins each file's index + filter blocks
// in heap for the lifetime of the reader. On a large state DB this grows without
// bound with the number of SST files (on a 490 GB mainnet DB the pinned filters
// alone reached ~6 GB). Caching them instead bounds total table memory to the block
// cache size; pinning L0 keeps the hottest level resident to avoid a read-latency cliff.
let configure_block_cache = |block_opts: &mut BlockBasedOptions| {
block_opts.set_block_cache(&block_cache);
block_opts.set_cache_index_and_filter_blocks(true);
block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true);
};

let mut cf_descriptors = Vec::new();
for cf_name in &all_cfs_to_open {
Expand All @@ -110,7 +130,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB blocks
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
CANONICAL_BLOCK_HASHES | BLOCK_NUMBERS => {
Expand All @@ -121,7 +141,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false);
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_TRIE_NODES | STORAGE_TRIE_NODES => {
Expand All @@ -134,7 +154,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false); // 10 bits per key
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_FLATKEYVALUE | STORAGE_FLATKEYVALUE => {
Expand All @@ -147,7 +167,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false); // 10 bits per key
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_CODES => {
Expand All @@ -162,7 +182,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
RECEIPTS_V2 => {
Expand All @@ -172,7 +192,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
_ => {
Expand All @@ -183,7 +203,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024);
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/storage/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ pub mod utils;

pub use layering::apply_prefix;
pub use store::{
AccountUpdatesList, EngineType, Store, UpdateBatch, has_valid_db, hash_address, hash_key,
read_chain_id_from_db,
AccountUpdatesList, DEFAULT_ROCKSDB_BLOCK_CACHE_SIZE_BYTES, EngineType, Store, StoreConfig,
UpdateBatch, has_valid_db, hash_address, hash_key, read_chain_id_from_db,
};

/// Store Schema Version, must be updated on any breaking change.
Expand Down
Loading
Loading