From fda18c9645eb2235e1126f52a268d36c7b5a26cf Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
Date: Fri, 16 May 2025 17:22:32 +0300
Subject: [PATCH 1/4] Use hashbrown hashmap/hashset in validation context

Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
---
 Cargo.lock                                        | 14 ++++++++------
 Cargo.toml                                        |  1 +
 cumulus/pallets/parachain-system/Cargo.toml       |  1 +
 .../src/validate_block/trie_cache.rs              | 15 +++++++--------
 .../src/validate_block/trie_recorder.rs           | 15 +++++++--------
 5 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4a5259c05fe62..929e42f10ad63 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -179,7 +179,7 @@ dependencies = [
  "const-hex",
  "derive_more 1.0.0",
  "foldhash",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "hex-literal",
  "indexmap 2.9.0",
  "itoa",
@@ -506,7 +506,7 @@ dependencies = [
  "ark-std 0.5.0",
  "educe",
  "fnv",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "itertools 0.13.0",
  "num-bigint",
  "num-integer",
@@ -740,7 +740,7 @@ dependencies = [
  "ark-std 0.5.0",
  "educe",
  "fnv",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
 ]
 
 [[package]]
@@ -4523,6 +4523,7 @@ dependencies = [
  "frame-support",
  "frame-system",
  "futures",
+ "hashbrown 0.15.3",
  "hex-literal",
  "impl-trait-for-tuples",
  "log",
@@ -7524,11 +7525,12 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.2"
+version = "0.15.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
 dependencies = [
  "allocator-api2",
+ "equivalent",
  "foldhash",
  "serde",
 ]
@@ -8270,7 +8272,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.3",
  "serde",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index 3c25cdda2bd27..345f515781636 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -829,6 +829,7 @@ governor = { version = "0.6.0" }
 gum = { path = "polkadot/node/gum", default-features = false, package = "tracing-gum" }
 gum-proc-macro = { path = "polkadot/node/gum/proc-macro", default-features = false, package = "tracing-gum-proc-macro" }
 handlebars = { version = "5.1.0" }
+hashbrown = "0.15.3"
 hash-db = { version = "0.16.0", default-features = false }
 hash256-std-hasher = { version = "0.15.2", default-features = false }
 hex = { version = "0.4.3", default-features = false }
diff --git a/cumulus/pallets/parachain-system/Cargo.toml b/cumulus/pallets/parachain-system/Cargo.toml
index 9752abe2914ea..fa8a0c11af921 100644
--- a/cumulus/pallets/parachain-system/Cargo.toml
+++ b/cumulus/pallets/parachain-system/Cargo.toml
@@ -15,6 +15,7 @@ workspace = true
 bytes = { workspace = true }
 codec = { features = ["derive"], workspace = true }
 environmental = { workspace = true }
+hashbrown = { workspace = true }
 impl-trait-for-tuples = { workspace = true }
 log = { workspace = true }
 scale-info = { features = ["derive"], workspace = true }
diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs
index 9590af993e9f9..26c338a3df2b1 100644
--- a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs
+++ b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs
@@ -15,11 +15,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use alloc::{
-	boxed::Box,
-	collections::btree_map::{BTreeMap, Entry},
-};
+use alloc::boxed::Box;
+
 use core::cell::{RefCell, RefMut};
+use hashbrown::{hash_map::Entry, HashMap};
 use sp_state_machine::TrieCacheProvider;
 use sp_trie::NodeCodec;
 use trie_db::{node::NodeOwned, Hasher};
@@ -28,8 +27,8 @@ use trie_db::{node::NodeOwned, Hasher};
 /// of values. To be used in `validate_block` to serve values and nodes that
 /// have already been loaded and decoded from the storage proof.
 pub(crate) struct TrieCache<'a, H: Hasher> {
-	node_cache: RefMut<'a, BTreeMap<H::Out, NodeOwned<H::Out>>>,
-	value_cache: Option<RefMut<'a, BTreeMap<Box<[u8]>, trie_db::CachedValue<H::Out>>>>,
+	node_cache: RefMut<'a, HashMap<H::Out, NodeOwned<H::Out>>>,
+	value_cache: Option<RefMut<'a, HashMap<Box<[u8]>, trie_db::CachedValue<H::Out>>>>,
 }
 
 impl<'a, H: Hasher> trie_db::TrieCache<NodeCodec<H>> for TrieCache<'a, H> {
@@ -66,14 +65,14 @@ impl<'a, H: Hasher> trie_db::TrieCache<NodeCodec<H>> for TrieCache<'a, H> {
 
 /// Provider of [`TrieCache`] instances.
 pub(crate) struct CacheProvider<H: Hasher> {
-	node_cache: RefCell<BTreeMap<H::Out, NodeOwned<H::Out>>>,
+	node_cache: RefCell<HashMap<H::Out, NodeOwned<H::Out>>>,
 	/// Cache: `storage_root` => `storage_key` => `value`.
 	///
 	/// One `block` can for example use multiple tries (child tries) and we need to distinguish the
 	/// cached (`storage_key`, `value`) between them. For this we are using the `storage_root` to
 	/// distinguish them (even if the storage root is the same for two child tries, it just means
 	/// that both are exactly the same trie and there would happen no collision).
-	value_cache: RefCell<BTreeMap<H::Out, BTreeMap<Box<[u8]>, trie_db::CachedValue<H::Out>>>>,
+	value_cache: RefCell<HashMap<H::Out, HashMap<Box<[u8]>, trie_db::CachedValue<H::Out>>>>,
 }
 
 impl<H: Hasher> CacheProvider<H> {
diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs
index c164cebd351f1..7413ba3c4960b 100644
--- a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs
+++ b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs
@@ -22,11 +22,10 @@
 
 use codec::Encode;
 
-use alloc::{
-	collections::{btree_map::BTreeMap, btree_set::BTreeSet},
-	rc::Rc,
-};
+use alloc::rc::Rc;
+
 use core::cell::{RefCell, RefMut};
+use hashbrown::{HashMap, HashSet};
 use sp_trie::{NodeCodec, ProofSizeProvider, StorageProof};
 use trie_db::{Hasher, RecordedForKey, TrieAccess};
 
@@ -35,9 +34,9 @@ use trie_db::{Hasher, RecordedForKey, TrieAccess};
 /// The internal size counting logic should align
 /// with ['sp_trie::recorder::Recorder'].
 pub(crate) struct SizeOnlyRecorder<'a, H: Hasher> {
-	seen_nodes: RefMut<'a, BTreeSet<H::Out>>,
+	seen_nodes: RefMut<'a, HashSet<H::Out>>,
 	encoded_size: RefMut<'a, usize>,
-	recorded_keys: RefMut<'a, BTreeMap<Rc<[u8]>, RecordedForKey>>,
+	recorded_keys: RefMut<'a, HashMap<Rc<[u8]>, RecordedForKey>>,
 }
 
 impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder<H::Out> for SizeOnlyRecorder<'a, H> {
@@ -91,9 +90,9 @@ impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder<H::Out> for SizeOnlyRecorder<
 
 #[derive(Clone)]
 pub(crate) struct SizeOnlyRecorderProvider<H: Hasher> {
-	seen_nodes: Rc<RefCell<BTreeSet<H::Out>>>,
+	seen_nodes: Rc<RefCell<HashSet<H::Out>>>,
 	encoded_size: Rc<RefCell<usize>>,
-	recorded_keys: Rc<RefCell<BTreeMap<Rc<[u8]>, RecordedForKey>>>,
+	recorded_keys: Rc<RefCell<HashMap<Rc<[u8]>, RecordedForKey>>>,
 }
 
 impl<H: Hasher> Default for SizeOnlyRecorderProvider<H> {

From b0ce4df6f5f976c621ca0f5a30161a87bf9cf020 Mon Sep 17 00:00:00 2001
From: "cmd[bot]" <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 23 May 2025 13:06:10 +0000
Subject: [PATCH 2/4] Update from github-actions[bot] running command 'prdoc
 --audience node_dev --bump patch'

---
 prdoc/pr_8606.prdoc | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 prdoc/pr_8606.prdoc

diff --git a/prdoc/pr_8606.prdoc b/prdoc/pr_8606.prdoc
new file mode 100644
index 0000000000000..8a2b0db58a7a6
--- /dev/null
+++ b/prdoc/pr_8606.prdoc
@@ -0,0 +1,10 @@
+title: Use hashbrown hashmap/hashset in validation context
+doc:
+- audience: Node Dev
+  description: |-
+    Discovered while profiling https://github.com/paritytech/polkadot-sdk/issues/6131#issuecomment-2891523233 with the benchmark https://github.com/paritytech/polkadot-sdk/pull/8069 that  when running in validation a big chunk of the time is spent inserting and retrieving data from the BTreeMap/BTreeSet.
+
+    By switching to hashbrown HashMap/HashSet in validation TrieCache and TrieRecorder  and the memory-db https://github.com/paritytech/trie/pull/221 read costs improve with around ~40% and write with about ~20%
+crates:
+- name: cumulus-pallet-parachain-system
+  bump: patch

From 3b2ea5e9ed09a869b9cdc120d1e0c0d4eb7f49e7 Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
Date: Fri, 23 May 2025 16:14:29 +0300
Subject: [PATCH 3/4] Make taplo happy

Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io>
---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 42c126302ae2a..b37bd3e7a3431 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -832,9 +832,9 @@ governor = { version = "0.6.0" }
 gum = { path = "polkadot/node/gum", default-features = false, package = "tracing-gum" }
 gum-proc-macro = { path = "polkadot/node/gum/proc-macro", default-features = false, package = "tracing-gum-proc-macro" }
 handlebars = { version = "5.1.0" }
-hashbrown = "0.15.3"
 hash-db = { version = "0.16.0", default-features = false }
 hash256-std-hasher = { version = "0.15.2", default-features = false }
+hashbrown = "0.15.3"
 hex = { version = "0.4.3", default-features = false }
 hex-literal = { version = "0.4.1", default-features = false }
 hkdf = { version = "0.12.0" }

From 4caa8d7776a944e551985cf4cb2f5d5c60b9e411 Mon Sep 17 00:00:00 2001
From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com>
Date: Fri, 23 May 2025 16:48:22 +0300
Subject: [PATCH 4/4] Update pr_8606.prdoc

---
 prdoc/pr_8606.prdoc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prdoc/pr_8606.prdoc b/prdoc/pr_8606.prdoc
index 8a2b0db58a7a6..493cad93dcc7b 100644
--- a/prdoc/pr_8606.prdoc
+++ b/prdoc/pr_8606.prdoc
@@ -7,4 +7,4 @@ doc:
     By switching to hashbrown HashMap/HashSet in validation TrieCache and TrieRecorder  and the memory-db https://github.com/paritytech/trie/pull/221 read costs improve with around ~40% and write with about ~20%
 crates:
 - name: cumulus-pallet-parachain-system
-  bump: patch
+  bump: minor