From dc7d0b0b3007c989c3eec39320e498c8080bae9d Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Mon, 9 Jun 2025 10:08:12 -0700 Subject: [PATCH 1/4] switch IrepNumbering caches to use FxHashMap --- Cargo.lock | 16 ++++++++ cprover_bindings/Cargo.toml | 1 + .../src/irep/goto_binary_serde.rs | 39 ++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a7c034c5b85a..af863b679354 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,6 +190,12 @@ dependencies = [ "which", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.10.1" @@ -406,6 +412,7 @@ dependencies = [ name = "cprover_bindings" version = "0.62.0" dependencies = [ + "fxhash", "lazy_static", "linear-map", "memuse", @@ -681,6 +688,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "getopts" version = "0.2.21" diff --git a/cprover_bindings/Cargo.toml b/cprover_bindings/Cargo.toml index 641d7862d776..d3213811a89d 100644 --- a/cprover_bindings/Cargo.toml +++ b/cprover_bindings/Cargo.toml @@ -20,6 +20,7 @@ serde = {version = "1", features = ["derive"]} string-interner = "0.19" tracing = "0.1" linear-map = {version = "1.2", features = ["serde_impl"]} +fxhash = "0.2.1" [dev-dependencies] serde_test = "1" diff --git a/cprover_bindings/src/irep/goto_binary_serde.rs b/cprover_bindings/src/irep/goto_binary_serde.rs index 8bc036b3c07f..62bca84340f5 100644 --- a/cprover_bindings/src/irep/goto_binary_serde.rs +++ b/cprover_bindings/src/irep/goto_binary_serde.rs @@ -80,6 +80,9 @@ use crate::irep::{Irep, IrepId, Symbol, SymbolTable}; use crate::{InternString, InternedString}; +#[cfg(not(test))] +use fxhash::FxHashMap; +#[cfg(test)] use std::collections::HashMap; use std::fs::File; use std::hash::Hash; @@ -199,6 +202,23 @@ impl IrepNumberingInv { } } +#[cfg(not(test))] +/// A numbering of [InternedString], [IrepId] and [Irep] based on their contents. +struct IrepNumbering { + /// Map from [InternedString] to their unique numbers. + string_cache: FxHashMap, + + /// Inverse string cache. + inv_string_cache: Vec, + + /// Map from [IrepKey] to their unique numbers. + cache: FxHashMap, + + /// Inverse cache, allows to get a NumberedIrep from its unique number. + inv_cache: IrepNumberingInv, +} + +#[cfg(test)] /// A numbering of [InternedString], [IrepId] and [Irep] based on their contents. struct IrepNumbering { /// Map from [InternedString] to their unique numbers. @@ -214,16 +234,31 @@ struct IrepNumbering { inv_cache: IrepNumberingInv, } +#[cfg(not(test))] impl IrepNumbering { fn new() -> Self { IrepNumbering { - string_cache: HashMap::new(), + string_cache: FxHashMap::default(), inv_string_cache: Vec::new(), - cache: HashMap::new(), + cache: FxHashMap::default(), inv_cache: IrepNumberingInv::new(), } } +} +#[cfg(test)] +impl IrepNumbering { + fn new() -> Self { + IrepNumbering { + string_cache: HashMap::default(), + inv_string_cache: Vec::new(), + cache: HashMap::default(), + inv_cache: IrepNumberingInv::new(), + } + } +} + +impl IrepNumbering { /// Returns a [NumberedString] from its number if it exists, None otherwise. fn numbered_string_from_number(&mut self, string_number: usize) -> Option { self.inv_string_cache.get(string_number).copied() From 40c399775f299edb724304a5376448f07655bb99 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Mon, 9 Jun 2025 10:11:12 -0700 Subject: [PATCH 2/4] use .entry() for irep cache initialization --- .../src/irep/goto_binary_serde.rs | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/cprover_bindings/src/irep/goto_binary_serde.rs b/cprover_bindings/src/irep/goto_binary_serde.rs index 62bca84340f5..fadcd0a98360 100644 --- a/cprover_bindings/src/irep/goto_binary_serde.rs +++ b/cprover_bindings/src/irep/goto_binary_serde.rs @@ -299,20 +299,18 @@ impl IrepNumbering { .map(|(key, value)| (self.number_irep_id(key).number, self.number_irep(value).number)) .collect(); let key = IrepKey::new(id, &sub, &named_sub); - self.get_or_insert(&key) + self.get_or_insert(key) } /// Gets the existing [NumberedIrep] from the [IrepKey] or inserts a fresh /// one and returns it. - fn get_or_insert(&mut self, key: &IrepKey) -> NumberedIrep { - if let Some(number) = self.cache.get(key) { - // Return the NumberedIrep from the inverse cache - return self.inv_cache.index[*number]; - } - // This is where the key gets its unique number assigned. - let number = self.inv_cache.add_key(key); - self.cache.insert(key.clone(), number); - self.inv_cache.index[number] + fn get_or_insert(&mut self, key: IrepKey) -> NumberedIrep { + let number = self.cache.entry(key).or_insert_with_key(|key| { + // This is where the key gets its unique number assigned. + self.inv_cache.add_key(key) + }); + + self.inv_cache.index[*number] } /// Returns the unique number of the `id` field of the given [NumberedIrep]. @@ -829,7 +827,7 @@ where let key = IrepKey::new(id, &sub, &named_sub); // Insert key in the numbering - let numbered = self.numbering.get_or_insert(&key); + let numbered = self.numbering.get_or_insert(key); // Map number from the binary to new number self.add_irep_mapping(irep_number, numbered.number); From 6ca757521a27a392c0e971ef1e6e4482a810a94d Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Mon, 9 Jun 2025 10:19:47 -0700 Subject: [PATCH 3/4] switch IrepId strings to be copy-on-write --- .../src/irep/goto_binary_serde.rs | 2 +- cprover_bindings/src/irep/irep_id.rs | 43 ++++++++++++------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/cprover_bindings/src/irep/goto_binary_serde.rs b/cprover_bindings/src/irep/goto_binary_serde.rs index fadcd0a98360..70f1c2bdff8f 100644 --- a/cprover_bindings/src/irep/goto_binary_serde.rs +++ b/cprover_bindings/src/irep/goto_binary_serde.rs @@ -283,7 +283,7 @@ impl IrepNumbering { /// Turns a [IrepId] to a [NumberedString]. The [IrepId] gets the number of its /// string representation. fn number_irep_id(&mut self, irep_id: &IrepId) -> NumberedString { - self.number_string(&irep_id.to_string().intern()) + self.number_string(&irep_id.to_string_cow().intern()) } /// Turns an [Irep] into a [NumberedIrep]. The [Irep] is recursively traversed diff --git a/cprover_bindings/src/irep/irep_id.rs b/cprover_bindings/src/irep/irep_id.rs index b3ca7a49ecab..4a975e3df40c 100644 --- a/cprover_bindings/src/irep/irep_id.rs +++ b/cprover_bindings/src/irep/irep_id.rs @@ -8,7 +8,7 @@ use crate::cbmc_string::InternedString; use crate::utils::NumUtils; use num::bigint::{BigInt, BigUint, Sign}; -use std::fmt::Display; +use std::borrow::Cow; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)] pub enum IrepId { @@ -877,22 +877,34 @@ impl IrepId { } } -impl Display for IrepId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// Implementing `ToString` rather than `Display` because display only has the interface +// to write to a formatter which seems to be slower when you just need the string. +#[allow(clippy::to_string_trait_impl)] +impl ToString for IrepId { + fn to_string(&self) -> String { + self.to_string_cow().into_owned() + } +} + +impl IrepId { + pub fn to_string_cow(&self) -> Cow { + match self.to_owned_string() { + Some(owned) => Cow::Owned(owned), + None => Cow::Borrowed(self.to_static_string()), + } + } + + fn to_owned_string(&self) -> Option { match self { - IrepId::FreeformString(s) => { - return write!(f, "{s}"); - } - IrepId::FreeformInteger(i) => { - return write!(f, "{i}"); - } - IrepId::FreeformBitPattern(i) => { - return write!(f, "{i:X}"); - } - _ => {} + IrepId::FreeformString(s) => Some(s.to_string()), + IrepId::FreeformInteger(i) => Some(i.to_string()), + IrepId::FreeformBitPattern(i) => Some(format!("{i:X}")), + _ => None, } + } - let s = match self { + fn to_static_string(&self) -> &'static str { + match self { IrepId::FreeformString(_) | IrepId::FreeformInteger(_) | IrepId::FreeformBitPattern { .. } => unreachable!(), @@ -1719,8 +1731,7 @@ impl Display for IrepId { IrepId::VectorGt => "vector->", IrepId::VectorLt => "vector-<", IrepId::FloatbvRoundToIntegral => "floatbv_round_to_integral", - }; - write!(f, "{s}") + } } } From 71263e917f18b9fd558899b988b01fa7f6eed233 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Wed, 11 Jun 2025 11:54:48 -0700 Subject: [PATCH 4/4] add code comment on why FxHashMap is only non-test --- cprover_bindings/src/irep/goto_binary_serde.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cprover_bindings/src/irep/goto_binary_serde.rs b/cprover_bindings/src/irep/goto_binary_serde.rs index 70f1c2bdff8f..864d4cc7a6f0 100644 --- a/cprover_bindings/src/irep/goto_binary_serde.rs +++ b/cprover_bindings/src/irep/goto_binary_serde.rs @@ -204,6 +204,9 @@ impl IrepNumberingInv { #[cfg(not(test))] /// A numbering of [InternedString], [IrepId] and [Irep] based on their contents. +/// Note that using [FxHashMap] makes our caches faster, but we still have to use +/// the default [HashMap] in a test context as only it implements the +/// `memuse::DynamicUsage` trait we need for memory profiling under test. struct IrepNumbering { /// Map from [InternedString] to their unique numbers. string_cache: FxHashMap, @@ -220,6 +223,7 @@ struct IrepNumbering { #[cfg(test)] /// A numbering of [InternedString], [IrepId] and [Irep] based on their contents. +/// See above for explanation of why this definition is only used in a test context. struct IrepNumbering { /// Map from [InternedString] to their unique numbers. string_cache: HashMap,