Skip to content
Merged
29 changes: 22 additions & 7 deletions crates/storage/backend/rocksdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ impl RocksDBBackend {
// blocks in userspace, reducing kernel I/O for hot data (trie nodes, accounts).
let block_cache = Cache::new_lru_cache(4 * 1024 * 1024 * 1024); // 4GB

// Configures a CF's block-based table to keep its index and bloom-filter blocks
// inside the shared (bounded) block cache rather than pinning them per open file.
//
// With `max_open_files(-1)` every SST stays open, and RocksDB's default
// (`cache_index_and_filter_blocks = false`) pins each file's index + filter blocks
// in heap for the lifetime of the reader. On a large state DB this grows without
// bound with the number of SST files (on a 490 GB mainnet DB the pinned filters
// alone reached ~6 GB). Caching them instead bounds total table memory to the block
// cache size; pinning L0 keeps the hottest level resident to avoid a read-latency cliff.
let configure_block_cache = |block_opts: &mut BlockBasedOptions| {
block_opts.set_block_cache(&block_cache);
block_opts.set_cache_index_and_filter_blocks(true);
block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true);
};
Comment on lines +113 to +117

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Without set_cache_index_and_filter_blocks_with_high_priority(true) (paired with a high_pri_pool_ratio on the cache), all non-L0 filter/index blocks compete equally with data blocks in the LRU. Under a read-heavy workload that touches many L1+ SST files (e.g. state queries or a fast-sync replay), data blocks can displace filter/index blocks from the cache, increasing filter misses and triggering extra disk reads — the same class of performance degradation the PR fixes for the baseline. Adding high-priority routing for index/filter blocks gives the eviction policy a knob to keep them resident without growing the cache.

Suggested change
let configure_block_cache = |block_opts: &mut BlockBasedOptions| {
block_opts.set_block_cache(&block_cache);
block_opts.set_cache_index_and_filter_blocks(true);
block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true);
};
let configure_block_cache = |block_opts: &mut BlockBasedOptions| {
block_opts.set_block_cache(&block_cache);
block_opts.set_cache_index_and_filter_blocks(true);
block_opts.set_cache_index_and_filter_blocks_with_high_priority(true);
block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true);
};
Prompt To Fix With AI
This is a comment left during a code review.
Path: crates/storage/backend/rocksdb.rs
Line: 113-117

Comment:
Without `set_cache_index_and_filter_blocks_with_high_priority(true)` (paired with a `high_pri_pool_ratio` on the cache), all non-L0 filter/index blocks compete equally with data blocks in the LRU. Under a read-heavy workload that touches many L1+ SST files (e.g. state queries or a fast-sync replay), data blocks can displace filter/index blocks from the cache, increasing filter misses and triggering extra disk reads — the same class of performance degradation the PR fixes for the baseline. Adding high-priority routing for index/filter blocks gives the eviction policy a knob to keep them resident without growing the cache.

```suggestion
        let configure_block_cache = |block_opts: &mut BlockBasedOptions| {
            block_opts.set_block_cache(&block_cache);
            block_opts.set_cache_index_and_filter_blocks(true);
            block_opts.set_cache_index_and_filter_blocks_with_high_priority(true);
            block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true);
        };
```

How can I resolve this? If you propose a fix, please make it concise.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other, those two options have default values


let mut cf_descriptors = Vec::new();
for cf_name in &all_cfs_to_open {
let mut cf_opts = Options::default();
Expand All @@ -110,7 +125,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB blocks
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
CANONICAL_BLOCK_HASHES | BLOCK_NUMBERS => {
Expand All @@ -121,7 +136,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false);
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_TRIE_NODES | STORAGE_TRIE_NODES => {
Expand All @@ -134,7 +149,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false); // 10 bits per key
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_FLATKEYVALUE | STORAGE_FLATKEYVALUE => {
Expand All @@ -147,7 +162,7 @@ impl RocksDBBackend {
let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024); // 16KB
block_opts.set_bloom_filter(10.0, false); // 10 bits per key
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
ACCOUNT_CODES => {
Expand All @@ -162,7 +177,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
RECEIPTS_V2 => {
Expand All @@ -172,7 +187,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(32 * 1024); // 32KB
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
_ => {
Expand All @@ -183,7 +198,7 @@ impl RocksDBBackend {

let mut block_opts = BlockBasedOptions::default();
block_opts.set_block_size(16 * 1024);
block_opts.set_block_cache(&block_cache);
configure_block_cache(&mut block_opts);
cf_opts.set_block_based_table_factory(&block_opts);
}
}
Expand Down
Loading