Skip to content

Commit a69af93

Browse files
committed
Auto merge of #549 - the-mikedavis:md/hash-table-iter-hash, r=Amanieu
Add `HashTable::iter_hash`, `HashTable::iter_hash_mut` This is a follow-up to #546 ([comment](#546 (comment))). `iter_hash` from the old raw API can be useful for reading from a "bag" / "multi map" type which allows duplicate key-value pairs. Exposing it safely in `HashTable` takes a fairly small wrapper around `RawIterHash`. This PR partially reverts #546 to restore `RawTable::iter_hash` and its associated types.
2 parents aa1411b + 8b60594 commit a69af93

File tree

2 files changed

+273
-1
lines changed

2 files changed

+273
-1
lines changed

Diff for: src/raw/mod.rs

+130
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,20 @@ impl<T, A: Allocator> RawTable<T, A> {
13721372
self.table.iter()
13731373
}
13741374

1375+
/// Returns an iterator over occupied buckets that could match a given hash.
1376+
///
1377+
/// `RawTable` only stores 7 bits of the hash value, so this iterator may
1378+
/// return items that have a hash value different than the one provided. You
1379+
/// should always validate the returned values before using them.
1380+
///
1381+
/// It is up to the caller to ensure that the `RawTable` outlives the
1382+
/// `RawIterHash`. Because we cannot make the `next` method unsafe on the
1383+
/// `RawIterHash` struct, we have to make the `iter_hash` method unsafe.
1384+
#[cfg_attr(feature = "inline-more", inline)]
1385+
pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<T> {
1386+
RawIterHash::new(self, hash)
1387+
}
1388+
13751389
/// Returns an iterator which removes all elements from the table without
13761390
/// freeing the memory.
13771391
#[cfg_attr(feature = "inline-more", inline)]
@@ -3996,6 +4010,122 @@ impl<T, A: Allocator> Iterator for RawDrain<'_, T, A> {
39964010
impl<T, A: Allocator> ExactSizeIterator for RawDrain<'_, T, A> {}
39974011
impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}
39984012

4013+
/// Iterator over occupied buckets that could match a given hash.
4014+
///
4015+
/// `RawTable` only stores 7 bits of the hash value, so this iterator may return
4016+
/// items that have a hash value different than the one provided. You should
4017+
/// always validate the returned values before using them.
4018+
///
4019+
/// For maximum flexibility this iterator is not bound by a lifetime, but you
4020+
/// must observe several rules when using it:
4021+
/// - You must not free the hash table while iterating (including via growing/shrinking).
4022+
/// - It is fine to erase a bucket that has been yielded by the iterator.
4023+
/// - Erasing a bucket that has not yet been yielded by the iterator may still
4024+
/// result in the iterator yielding that bucket.
4025+
/// - It is unspecified whether an element inserted after the iterator was
4026+
/// created will be yielded by that iterator.
4027+
/// - The order in which the iterator yields buckets is unspecified and may
4028+
/// change in the future.
4029+
pub struct RawIterHash<T> {
4030+
inner: RawIterHashInner,
4031+
_marker: PhantomData<T>,
4032+
}
4033+
4034+
struct RawIterHashInner {
4035+
// See `RawTableInner`'s corresponding fields for details.
4036+
// We can't store a `*const RawTableInner` as it would get
4037+
// invalidated by the user calling `&mut` methods on `RawTable`.
4038+
bucket_mask: usize,
4039+
ctrl: NonNull<u8>,
4040+
4041+
// The top 7 bits of the hash.
4042+
h2_hash: u8,
4043+
4044+
// The sequence of groups to probe in the search.
4045+
probe_seq: ProbeSeq,
4046+
4047+
group: Group,
4048+
4049+
// The elements within the group with a matching h2-hash.
4050+
bitmask: BitMaskIter,
4051+
}
4052+
4053+
impl<T> RawIterHash<T> {
4054+
#[cfg_attr(feature = "inline-more", inline)]
4055+
unsafe fn new<A: Allocator>(table: &RawTable<T, A>, hash: u64) -> Self {
4056+
RawIterHash {
4057+
inner: RawIterHashInner::new(&table.table, hash),
4058+
_marker: PhantomData,
4059+
}
4060+
}
4061+
}
4062+
4063+
impl RawIterHashInner {
4064+
#[cfg_attr(feature = "inline-more", inline)]
4065+
unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
4066+
let h2_hash = h2(hash);
4067+
let probe_seq = table.probe_seq(hash);
4068+
let group = Group::load(table.ctrl(probe_seq.pos));
4069+
let bitmask = group.match_byte(h2_hash).into_iter();
4070+
4071+
RawIterHashInner {
4072+
bucket_mask: table.bucket_mask,
4073+
ctrl: table.ctrl,
4074+
h2_hash,
4075+
probe_seq,
4076+
group,
4077+
bitmask,
4078+
}
4079+
}
4080+
}
4081+
4082+
impl<T> Iterator for RawIterHash<T> {
4083+
type Item = Bucket<T>;
4084+
4085+
fn next(&mut self) -> Option<Bucket<T>> {
4086+
unsafe {
4087+
match self.inner.next() {
4088+
Some(index) => {
4089+
// Can't use `RawTable::bucket` here as we don't have
4090+
// an actual `RawTable` reference to use.
4091+
debug_assert!(index <= self.inner.bucket_mask);
4092+
let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index);
4093+
Some(bucket)
4094+
}
4095+
None => None,
4096+
}
4097+
}
4098+
}
4099+
}
4100+
4101+
impl Iterator for RawIterHashInner {
4102+
type Item = usize;
4103+
4104+
fn next(&mut self) -> Option<Self::Item> {
4105+
unsafe {
4106+
loop {
4107+
if let Some(bit) = self.bitmask.next() {
4108+
let index = (self.probe_seq.pos + bit) & self.bucket_mask;
4109+
return Some(index);
4110+
}
4111+
if likely(self.group.match_empty().any_bit_set()) {
4112+
return None;
4113+
}
4114+
self.probe_seq.move_next(self.bucket_mask);
4115+
4116+
// Can't use `RawTableInner::ctrl` here as we don't have
4117+
// an actual `RawTableInner` reference to use.
4118+
let index = self.probe_seq.pos;
4119+
debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH);
4120+
let group_ctrl = self.ctrl.as_ptr().add(index);
4121+
4122+
self.group = Group::load(group_ctrl);
4123+
self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
4124+
}
4125+
}
4126+
}
4127+
}
4128+
39994129
pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
40004130
pub iter: RawIter<T>,
40014131
pub table: &'a mut RawTable<T, A>,

Diff for: src/table.rs

+143-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData};
33
use crate::{
44
raw::{
55
Allocator, Bucket, Global, InsertSlot, RawDrain, RawExtractIf, RawIntoIter, RawIter,
6-
RawTable,
6+
RawIterHash, RawTable,
77
},
88
TryReserveError,
99
};
@@ -741,6 +741,98 @@ where
741741
}
742742
}
743743

744+
/// An iterator visiting all elements which may match a hash.
745+
/// The iterator element type is `&'a T`.
746+
///
747+
/// This iterator may return elements from the table that have a hash value
748+
/// different than the one provided. You should always validate the returned
749+
/// values before using them.
750+
///
751+
/// # Examples
752+
///
753+
/// ```
754+
/// # #[cfg(feature = "nightly")]
755+
/// # fn test() {
756+
/// use hashbrown::{HashTable, DefaultHashBuilder};
757+
/// use std::hash::BuildHasher;
758+
///
759+
/// let mut table = HashTable::new();
760+
/// let hasher = DefaultHashBuilder::default();
761+
/// let hasher = |val: &_| hasher.hash_one(val);
762+
/// table.insert_unique(hasher(&"a"), "a", hasher);
763+
/// table.insert_unique(hasher(&"a"), "b", hasher);
764+
/// table.insert_unique(hasher(&"b"), "c", hasher);
765+
///
766+
/// // Will print "a" and "b" (and possibly "c") in an arbitrary order.
767+
/// for x in table.iter_hash(hasher(&"a")) {
768+
/// println!("{}", x);
769+
/// }
770+
/// # }
771+
/// # fn main() {
772+
/// # #[cfg(feature = "nightly")]
773+
/// # test()
774+
/// # }
775+
/// ```
776+
pub fn iter_hash(&self, hash: u64) -> IterHash<'_, T> {
777+
IterHash {
778+
inner: unsafe { self.raw.iter_hash(hash) },
779+
_marker: PhantomData,
780+
}
781+
}
782+
783+
/// A mutable iterator visiting all elements which may match a hash.
784+
/// The iterator element type is `&'a mut T`.
785+
///
786+
/// This iterator may return elements from the table that have a hash value
787+
/// different than the one provided. You should always validate the returned
788+
/// values before using them.
789+
///
790+
/// # Examples
791+
///
792+
/// ```
793+
/// # #[cfg(feature = "nightly")]
794+
/// # fn test() {
795+
/// use hashbrown::{HashTable, DefaultHashBuilder};
796+
/// use std::hash::BuildHasher;
797+
///
798+
/// let mut table = HashTable::new();
799+
/// let hasher = DefaultHashBuilder::default();
800+
/// let hasher = |val: &_| hasher.hash_one(val);
801+
/// table.insert_unique(hasher(&1), 2, hasher);
802+
/// table.insert_unique(hasher(&1), 3, hasher);
803+
/// table.insert_unique(hasher(&2), 5, hasher);
804+
///
805+
/// // Update matching values
806+
/// for val in table.iter_hash_mut(hasher(&1)) {
807+
/// *val *= 2;
808+
/// }
809+
///
810+
/// assert_eq!(table.len(), 3);
811+
/// let mut vec: Vec<i32> = Vec::new();
812+
///
813+
/// for val in &table {
814+
/// println!("val: {}", val);
815+
/// vec.push(*val);
816+
/// }
817+
///
818+
/// // The values will contain 4 and 6 and may contain either 5 or 10.
819+
/// assert!(vec.contains(&4));
820+
/// assert!(vec.contains(&6));
821+
///
822+
/// assert_eq!(table.len(), 3);
823+
/// # }
824+
/// # fn main() {
825+
/// # #[cfg(feature = "nightly")]
826+
/// # test()
827+
/// # }
828+
/// ```
829+
pub fn iter_hash_mut(&mut self, hash: u64) -> IterHashMut<'_, T> {
830+
IterHashMut {
831+
inner: unsafe { self.raw.iter_hash(hash) },
832+
_marker: PhantomData,
833+
}
834+
}
835+
744836
/// Retains only the elements specified by the predicate.
745837
///
746838
/// In other words, remove all elements `e` such that `f(&e)` returns `false`.
@@ -1932,6 +2024,56 @@ impl<T> ExactSizeIterator for IterMut<'_, T> {
19322024

19332025
impl<T> FusedIterator for IterMut<'_, T> {}
19342026

2027+
/// An iterator over the entries of a `HashTable` that could match a given hash.
2028+
/// The iterator element type is `&'a T`.
2029+
///
2030+
/// This `struct` is created by the [`iter_hash`] method on [`HashTable`]. See its
2031+
/// documentation for more.
2032+
///
2033+
/// [`iter_hash`]: struct.HashTable.html#method.iter_hash
2034+
/// [`HashTable`]: struct.HashTable.html
2035+
pub struct IterHash<'a, T> {
2036+
inner: RawIterHash<T>,
2037+
_marker: PhantomData<&'a T>,
2038+
}
2039+
2040+
impl<'a, T> Iterator for IterHash<'a, T> {
2041+
type Item = &'a T;
2042+
2043+
fn next(&mut self) -> Option<Self::Item> {
2044+
// Avoid `Option::map` because it bloats LLVM IR.
2045+
match self.inner.next() {
2046+
Some(bucket) => Some(unsafe { bucket.as_ref() }),
2047+
None => None,
2048+
}
2049+
}
2050+
}
2051+
2052+
/// A mutable iterator over the entries of a `HashTable` that could match a given hash.
2053+
/// The iterator element type is `&'a mut T`.
2054+
///
2055+
/// This `struct` is created by the [`iter_hash_mut`] method on [`HashTable`]. See its
2056+
/// documentation for more.
2057+
///
2058+
/// [`iter_hash_mut`]: struct.HashTable.html#method.iter_hash_mut
2059+
/// [`HashTable`]: struct.HashTable.html
2060+
pub struct IterHashMut<'a, T> {
2061+
inner: RawIterHash<T>,
2062+
_marker: PhantomData<&'a mut T>,
2063+
}
2064+
2065+
impl<'a, T> Iterator for IterHashMut<'a, T> {
2066+
type Item = &'a mut T;
2067+
2068+
fn next(&mut self) -> Option<Self::Item> {
2069+
// Avoid `Option::map` because it bloats LLVM IR.
2070+
match self.inner.next() {
2071+
Some(bucket) => Some(unsafe { bucket.as_mut() }),
2072+
None => None,
2073+
}
2074+
}
2075+
}
2076+
19352077
/// An owning iterator over the entries of a `HashTable` in arbitrary order.
19362078
/// The iterator element type is `T`.
19372079
///

0 commit comments

Comments
 (0)