From f37b211d192a20c3bf8ed7537ecd86cef4518709 Mon Sep 17 00:00:00 2001 From: sapphi-red <49056869+sapphi-red@users.noreply.github.com> Date: Sat, 4 Oct 2025 11:14:11 +0000 Subject: [PATCH] feat(allocator): add `HashSet` (#14212) Adds the arena-allocated `HashSet`. --- crates/oxc_allocator/src/hash_map.rs | 13 +- crates/oxc_allocator/src/hash_set.rs | 211 +++++++++++++++++++++++++++ crates/oxc_allocator/src/lib.rs | 3 + 3 files changed, 215 insertions(+), 12 deletions(-) create mode 100644 crates/oxc_allocator/src/hash_set.rs diff --git a/crates/oxc_allocator/src/hash_map.rs b/crates/oxc_allocator/src/hash_map.rs index 86f51c535f6d6..06dac053acd6f 100644 --- a/crates/oxc_allocator/src/hash_map.rs +++ b/crates/oxc_allocator/src/hash_map.rs @@ -49,7 +49,7 @@ type FxHashMap<'alloc, K, V> = hashbrown::HashMap(ManuallyDrop>); +pub struct HashMap<'alloc, K, V>(pub(crate) ManuallyDrop>); /// SAFETY: Even though `Bump` is not `Sync`, we can make `HashMap` `Sync` if both `K` and `V` /// are `Sync` because: @@ -285,14 +285,3 @@ where } // Note: `Index` and `Extend` are implemented via `Deref` - -/* -// Uncomment once we also provide `oxc_allocator::HashSet` -impl<'alloc, T> From> for HashSet<'alloc, T> { - fn from(map: HashMap<'alloc, T, ()>) -> Self { - let inner_map = ManuallyDrop::into_inner(map.0); - let inner_set = FxHashSet::from(inner_map); - Self(ManuallyDrop::new(inner_set)) - } -} -*/ diff --git a/crates/oxc_allocator/src/hash_set.rs b/crates/oxc_allocator/src/hash_set.rs new file mode 100644 index 0000000000000..4b0721ac5174f --- /dev/null +++ b/crates/oxc_allocator/src/hash_set.rs @@ -0,0 +1,211 @@ +//! A hash set without `Drop`, that uses [`FxHasher`] to hash keys, and stores data in arena allocator. +//! +//! See [`HashSet`] for more details. +//! +//! [`FxHasher`]: rustc_hash::FxHasher + +// All methods which just delegate to `hashbrown::HashSet` methods marked `#[inline(always)]` +#![expect(clippy::inline_always)] + +use std::{ + hash::Hash, + mem::ManuallyDrop, + ops::{Deref, DerefMut}, +}; + +use bumpalo::Bump; +use rustc_hash::FxBuildHasher; + +// Re-export additional types from `hashbrown` +pub use hashbrown::hash_set::{ + Difference, Drain, Entry, ExtractIf, Intersection, IntoIter, Iter, SymmetricDifference, Union, +}; + +use crate::{Allocator, HashMap}; + +type FxHashSet<'alloc, T> = hashbrown::HashSet; + +/// A hash set without `Drop`, that uses [`FxHasher`] to hash keys, and stores data in arena allocator. +/// +/// Just a thin wrapper around [`hashbrown::HashSet`], which disables the `Drop` implementation. +/// +/// All APIs are the same, except create a [`HashSet`] with +/// either [`new_in`](HashSet::new_in) or [`with_capacity_in`](HashSet::with_capacity_in). +/// +/// # No `Drop`s +/// +/// Objects allocated into Oxc memory arenas are never [`Dropped`](Drop). Memory is released in bulk +/// when the allocator is dropped, without dropping the individual objects in the arena. +/// +/// Therefore, it would produce a memory leak if you allocated [`Drop`] types into the arena +/// which own memory allocations outside the arena. +/// +/// Static checks make this impossible to do. [`HashSet::new_in`] and all other methods which create +/// a [`HashSet`] will refuse to compile if the key is a [`Drop`] type. +/// +/// [`FxHasher`]: rustc_hash::FxHasher +#[derive(Debug)] +pub struct HashSet<'alloc, T>(ManuallyDrop>); + +/// SAFETY: Same as `HashMap`. See `HashMap`'s doc comment for details. +unsafe impl Sync for HashSet<'_, T> {} + +// TODO: `IntoIter` and other consuming iterators provided by `hashbrown` are `Drop`. +// Wrap them in `ManuallyDrop` to prevent that. + +impl<'alloc, T> HashSet<'alloc, T> { + /// Const assertion that `T` is not `Drop`. + /// Must be referenced in all methods which create a `HashSet`. + const ASSERT_T_IS_NOT_DROP: () = { + assert!(!std::mem::needs_drop::(), "Cannot create a HashSet where T is a Drop type"); + }; + + /// Creates an empty [`HashSet`]. It will be allocated with the given allocator. + /// + /// The hash set is initially created with a capacity of 0, so it will not allocate + /// until it is first inserted into. + #[inline(always)] + pub fn new_in(allocator: &'alloc Allocator) -> Self { + const { Self::ASSERT_T_IS_NOT_DROP }; + + let inner = FxHashSet::with_hasher_in(FxBuildHasher, allocator.bump()); + Self(ManuallyDrop::new(inner)) + } + + /// Creates an empty [`HashSet`] with the specified capacity. It will be allocated with the given allocator. + /// + /// The hash set will be able to hold at least capacity elements without reallocating. + /// If capacity is 0, the hash set will not allocate. + #[inline(always)] + pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> Self { + const { Self::ASSERT_T_IS_NOT_DROP }; + + let inner = + FxHashSet::with_capacity_and_hasher_in(capacity, FxBuildHasher, allocator.bump()); + Self(ManuallyDrop::new(inner)) + } + + /// Create a new [`HashSet`] whose elements are taken from an iterator and allocated in the given `allocator`. + /// + /// This is behaviorially identical to [`FromIterator::from_iter`]. + #[inline] + pub fn from_iter_in>(iter: I, allocator: &'alloc Allocator) -> Self + where + T: Eq + Hash, + { + const { Self::ASSERT_T_IS_NOT_DROP }; + + let iter = iter.into_iter(); + + // Use the iterator's lower size bound. + // This follows `hashbrown::HashSet`'s `from_iter` implementation. + // + // This is a trade-off: + // * Negative: If lower bound is too low, the `HashSet` may have to grow and reallocate during `for_each` loop. + // * Positive: Avoids potential large over-allocation for iterators where upper bound may be a large over-estimate + // e.g. filter iterators. + let capacity = iter.size_hint().0; + let set = FxHashSet::with_capacity_and_hasher_in(capacity, FxBuildHasher, allocator.bump()); + // Wrap in `ManuallyDrop` *before* calling `for_each`, so compiler doesn't insert unnecessary code + // to drop the `FxHashSet` in case of a panic in iterator's `next` method + let mut set = ManuallyDrop::new(set); + + iter.for_each(|v| { + set.insert(v); + }); + + Self(set) + } + + /// Calling this method produces a compile-time panic. + /// + /// This method would be unsound, because [`HashSet`] is `Sync`, and the underlying allocator + /// (`bumpalo::Bump`) is not `Sync`. + /// + /// This method exists only to block access as much as possible to the underlying + /// `hashbrown::HashSet::allocator` method. That method can still be accessed via explicit `Deref` + /// (`hash_set.deref().allocator()`), but that's unsound. + /// + /// We'll prevent access to it completely and remove this method as soon as we can. + // TODO: Do that! + #[expect(clippy::unused_self)] + pub fn allocator(&self) -> &'alloc Bump { + const { panic!("This method cannot be called") }; + unreachable!(); + } +} + +// Provide access to all `hashbrown::HashSet`'s methods via deref +impl<'alloc, T> Deref for HashSet<'alloc, T> { + type Target = FxHashSet<'alloc, T>; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'alloc, T> DerefMut for HashSet<'alloc, T> { + #[inline] + fn deref_mut(&mut self) -> &mut FxHashSet<'alloc, T> { + &mut self.0 + } +} + +impl<'alloc, T> IntoIterator for HashSet<'alloc, T> { + type IntoIter = IntoIter; + type Item = T; + + /// Creates a consuming iterator, that is, one that moves each value out of the set + /// in arbitrary order. + /// + /// The set cannot be used after calling this. + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + let inner = ManuallyDrop::into_inner(self.0); + // TODO: `hashbrown::hash_set::IntoIter` is `Drop`. + // Wrap it in `ManuallyDrop` to prevent that. + inner.into_iter() + } +} + +impl<'alloc, 'i, T> IntoIterator for &'i HashSet<'alloc, T> { + type IntoIter = <&'i FxHashSet<'alloc, T> as IntoIterator>::IntoIter; + type Item = &'i T; + + /// Creates an iterator over the values of a `HashSet` in arbitrary order. + /// + /// The iterator element type is `&'a T`. + /// + /// Return the same [`Iter`] struct as by the `iter` method on [`HashSet`]. + #[inline(always)] + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +impl PartialEq for HashSet<'_, T> +where + T: Eq + Hash, +{ + #[inline(always)] + fn eq(&self, other: &Self) -> bool { + self.0.eq(&other.0) + } +} + +impl Eq for HashSet<'_, T> where T: Eq + Hash {} + +// Note: `Index` and `Extend` are implemented via `Deref` + +/// Convert `HashMap` to `HashSet`. +/// +/// This conversion is zero cost, as `HashSet` is just a wrapper around `HashMap`. +impl<'alloc, T> From> for HashSet<'alloc, T> { + #[inline(always)] + fn from(map: HashMap<'alloc, T, ()>) -> Self { + let inner_map = ManuallyDrop::into_inner(map.0); + let inner_set = hashbrown::HashSet::from(inner_map); + Self(ManuallyDrop::new(inner_set)) + } +} diff --git a/crates/oxc_allocator/src/lib.rs b/crates/oxc_allocator/src/lib.rs index dd1dad1743e82..94a2cde892302 100644 --- a/crates/oxc_allocator/src/lib.rs +++ b/crates/oxc_allocator/src/lib.rs @@ -9,6 +9,7 @@ //! * [`Vec`] //! * [`String`] //! * [`HashMap`] +//! * [`HashSet`] //! //! See [`Allocator`] docs for information on efficient use of [`Allocator`]. //! @@ -50,6 +51,7 @@ mod convert; #[cfg(feature = "from_raw_parts")] mod from_raw_parts; pub mod hash_map; +pub mod hash_set; #[cfg(feature = "pool")] mod pool; mod string_builder; @@ -68,6 +70,7 @@ pub use boxed::Box; pub use clone_in::CloneIn; pub use convert::{FromIn, IntoIn}; pub use hash_map::HashMap; +pub use hash_set::HashSet; #[cfg(feature = "pool")] pub use pool::*; pub use string_builder::StringBuilder;