From fda18c9645eb2235e1126f52a268d36c7b5a26cf Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 16 May 2025 17:22:32 +0300 Subject: [PATCH 1/4] Use hashbrown hashmap/hashset in validation context Signed-off-by: Alexandru Gheorghe --- Cargo.lock | 14 ++++++++------ Cargo.toml | 1 + cumulus/pallets/parachain-system/Cargo.toml | 1 + .../src/validate_block/trie_cache.rs | 15 +++++++-------- .../src/validate_block/trie_recorder.rs | 15 +++++++-------- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a5259c05fe62..929e42f10ad63 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,7 +179,7 @@ dependencies = [ "const-hex", "derive_more 1.0.0", "foldhash", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "hex-literal", "indexmap 2.9.0", "itoa", @@ -506,7 +506,7 @@ dependencies = [ "ark-std 0.5.0", "educe", "fnv", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "itertools 0.13.0", "num-bigint", "num-integer", @@ -740,7 +740,7 @@ dependencies = [ "ark-std 0.5.0", "educe", "fnv", - "hashbrown 0.15.2", + "hashbrown 0.15.3", ] [[package]] @@ -4523,6 +4523,7 @@ dependencies = [ "frame-support", "frame-system", "futures", + "hashbrown 0.15.3", "hex-literal", "impl-trait-for-tuples", "log", @@ -7524,11 +7525,12 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", + "equivalent", "foldhash", "serde", ] @@ -8270,7 +8272,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 3c25cdda2bd27..345f515781636 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -829,6 +829,7 @@ governor = { version = "0.6.0" } gum = { path = "polkadot/node/gum", default-features = false, package = "tracing-gum" } gum-proc-macro = { path = "polkadot/node/gum/proc-macro", default-features = false, package = "tracing-gum-proc-macro" } handlebars = { version = "5.1.0" } +hashbrown = "0.15.3" hash-db = { version = "0.16.0", default-features = false } hash256-std-hasher = { version = "0.15.2", default-features = false } hex = { version = "0.4.3", default-features = false } diff --git a/cumulus/pallets/parachain-system/Cargo.toml b/cumulus/pallets/parachain-system/Cargo.toml index 9752abe2914ea..fa8a0c11af921 100644 --- a/cumulus/pallets/parachain-system/Cargo.toml +++ b/cumulus/pallets/parachain-system/Cargo.toml @@ -15,6 +15,7 @@ workspace = true bytes = { workspace = true } codec = { features = ["derive"], workspace = true } environmental = { workspace = true } +hashbrown = { workspace = true } impl-trait-for-tuples = { workspace = true } log = { workspace = true } scale-info = { features = ["derive"], workspace = true } diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs index 9590af993e9f9..26c338a3df2b1 100644 --- a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs +++ b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs @@ -15,11 +15,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use alloc::{ - boxed::Box, - collections::btree_map::{BTreeMap, Entry}, -}; +use alloc::boxed::Box; + use core::cell::{RefCell, RefMut}; +use hashbrown::{hash_map::Entry, HashMap}; use sp_state_machine::TrieCacheProvider; use sp_trie::NodeCodec; use trie_db::{node::NodeOwned, Hasher}; @@ -28,8 +27,8 @@ use trie_db::{node::NodeOwned, Hasher}; /// of values. To be used in `validate_block` to serve values and nodes that /// have already been loaded and decoded from the storage proof. pub(crate) struct TrieCache<'a, H: Hasher> { - node_cache: RefMut<'a, BTreeMap>>, - value_cache: Option, trie_db::CachedValue>>>, + node_cache: RefMut<'a, HashMap>>, + value_cache: Option, trie_db::CachedValue>>>, } impl<'a, H: Hasher> trie_db::TrieCache> for TrieCache<'a, H> { @@ -66,14 +65,14 @@ impl<'a, H: Hasher> trie_db::TrieCache> for TrieCache<'a, H> { /// Provider of [`TrieCache`] instances. pub(crate) struct CacheProvider { - node_cache: RefCell>>, + node_cache: RefCell>>, /// Cache: `storage_root` => `storage_key` => `value`. /// /// One `block` can for example use multiple tries (child tries) and we need to distinguish the /// cached (`storage_key`, `value`) between them. For this we are using the `storage_root` to /// distinguish them (even if the storage root is the same for two child tries, it just means /// that both are exactly the same trie and there would happen no collision). - value_cache: RefCell, trie_db::CachedValue>>>, + value_cache: RefCell, trie_db::CachedValue>>>, } impl CacheProvider { diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs index c164cebd351f1..7413ba3c4960b 100644 --- a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs +++ b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs @@ -22,11 +22,10 @@ use codec::Encode; -use alloc::{ - collections::{btree_map::BTreeMap, btree_set::BTreeSet}, - rc::Rc, -}; +use alloc::rc::Rc; + use core::cell::{RefCell, RefMut}; +use hashbrown::{HashMap, HashSet}; use sp_trie::{NodeCodec, ProofSizeProvider, StorageProof}; use trie_db::{Hasher, RecordedForKey, TrieAccess}; @@ -35,9 +34,9 @@ use trie_db::{Hasher, RecordedForKey, TrieAccess}; /// The internal size counting logic should align /// with ['sp_trie::recorder::Recorder']. pub(crate) struct SizeOnlyRecorder<'a, H: Hasher> { - seen_nodes: RefMut<'a, BTreeSet>, + seen_nodes: RefMut<'a, HashSet>, encoded_size: RefMut<'a, usize>, - recorded_keys: RefMut<'a, BTreeMap, RecordedForKey>>, + recorded_keys: RefMut<'a, HashMap, RecordedForKey>>, } impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder for SizeOnlyRecorder<'a, H> { @@ -91,9 +90,9 @@ impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder for SizeOnlyRecorder< #[derive(Clone)] pub(crate) struct SizeOnlyRecorderProvider { - seen_nodes: Rc>>, + seen_nodes: Rc>>, encoded_size: Rc>, - recorded_keys: Rc, RecordedForKey>>>, + recorded_keys: Rc, RecordedForKey>>>, } impl Default for SizeOnlyRecorderProvider { From b0ce4df6f5f976c621ca0f5a30161a87bf9cf020 Mon Sep 17 00:00:00 2001 From: "cmd[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 23 May 2025 13:06:10 +0000 Subject: [PATCH 2/4] Update from github-actions[bot] running command 'prdoc --audience node_dev --bump patch' --- prdoc/pr_8606.prdoc | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 prdoc/pr_8606.prdoc diff --git a/prdoc/pr_8606.prdoc b/prdoc/pr_8606.prdoc new file mode 100644 index 0000000000000..8a2b0db58a7a6 --- /dev/null +++ b/prdoc/pr_8606.prdoc @@ -0,0 +1,10 @@ +title: Use hashbrown hashmap/hashset in validation context +doc: +- audience: Node Dev + description: |- + Discovered while profiling https://github.com/paritytech/polkadot-sdk/issues/6131#issuecomment-2891523233 with the benchmark https://github.com/paritytech/polkadot-sdk/pull/8069 that when running in validation a big chunk of the time is spent inserting and retrieving data from the BTreeMap/BTreeSet. + + By switching to hashbrown HashMap/HashSet in validation TrieCache and TrieRecorder and the memory-db https://github.com/paritytech/trie/pull/221 read costs improve with around ~40% and write with about ~20% +crates: +- name: cumulus-pallet-parachain-system + bump: patch From 3b2ea5e9ed09a869b9cdc120d1e0c0d4eb7f49e7 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Fri, 23 May 2025 16:14:29 +0300 Subject: [PATCH 3/4] Make taplo happy Signed-off-by: Alexandru Gheorghe --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 42c126302ae2a..b37bd3e7a3431 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -832,9 +832,9 @@ governor = { version = "0.6.0" } gum = { path = "polkadot/node/gum", default-features = false, package = "tracing-gum" } gum-proc-macro = { path = "polkadot/node/gum/proc-macro", default-features = false, package = "tracing-gum-proc-macro" } handlebars = { version = "5.1.0" } -hashbrown = "0.15.3" hash-db = { version = "0.16.0", default-features = false } hash256-std-hasher = { version = "0.15.2", default-features = false } +hashbrown = "0.15.3" hex = { version = "0.4.3", default-features = false } hex-literal = { version = "0.4.1", default-features = false } hkdf = { version = "0.12.0" } From 4caa8d7776a944e551985cf4cb2f5d5c60b9e411 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Fri, 23 May 2025 16:48:22 +0300 Subject: [PATCH 4/4] Update pr_8606.prdoc --- prdoc/pr_8606.prdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prdoc/pr_8606.prdoc b/prdoc/pr_8606.prdoc index 8a2b0db58a7a6..493cad93dcc7b 100644 --- a/prdoc/pr_8606.prdoc +++ b/prdoc/pr_8606.prdoc @@ -7,4 +7,4 @@ doc: By switching to hashbrown HashMap/HashSet in validation TrieCache and TrieRecorder and the memory-db https://github.com/paritytech/trie/pull/221 read costs improve with around ~40% and write with about ~20% crates: - name: cumulus-pallet-parachain-system - bump: patch + bump: minor