orlp · orlp · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · knl
diff --git a/README.md b/README.md
@@ -275,3 +275,9 @@ outputs, and feasible to derive the secret values from indirect observation of
 hashes, such as through timing attacks or hash table iteration. Once an attacker
 knows the secret values, they can once again create infinite hash collisions
 with ease.
+
+
+## Acknowledgements
+
+We thank Liam Gray for their suggestions on improving string hashing
+performance.
diff --git a/src/fast.rs b/src/fast.rs
@@ -3,37 +3,31 @@
 use core::hash::{BuildHasher, Hasher};
 
 use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
-use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3};
+use crate::{folded_multiply, hash_bytes_long, hash_bytes_short, rotate_right, ARBITRARY3};
 
 /// A [`Hasher`] instance implementing foldhash, optimized for speed.
 ///
 /// While you can create one directly with [`FoldHasher::with_seed`], you
 /// most likely want to use [`RandomState`], [`SeedableRandomState`] or
 /// [`FixedState`] to create [`FoldHasher`]s.
 #[derive(Clone)]
-pub struct FoldHasher {
+pub struct FoldHasher<'a> {
     accumulator: u64,
     sponge: u128,
     sponge_len: u8,
-    fold_seed: u64,
-    expand_seed: u64,
-    expand_seed2: u64,
-    expand_seed3: u64,
+    seeds: &'a [u64; 4],
 }
 
-impl FoldHasher {
+impl<'a> FoldHasher<'a> {
     /// Initializes this [`FoldHasher`] with the given per-hasher seed and
     /// [`SharedSeed`].
     #[inline]
-    pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
+    pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
         FoldHasher {
             accumulator: per_hasher_seed,
             sponge: 0,
             sponge_len: 0,
-            fold_seed: shared_seed.seeds[0],
-            expand_seed: shared_seed.seeds[1],
-            expand_seed2: shared_seed.seeds[2],
-            expand_seed3: shared_seed.seeds[3],
+            seeds: &shared_seed.seeds,
         }
     }
 
@@ -43,7 +37,7 @@ impl FoldHasher {
         if self.sponge_len as usize + bits > 128 {
             let lo = self.sponge as u64;
             let hi = (self.sponge >> 64) as u64;
-            self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
+            self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
             self.sponge = x.into();
             self.sponge_len = bits as u8;
         } else {
@@ -53,7 +47,7 @@ impl FoldHasher {
     }
 }
 
-impl Hasher for FoldHasher {
+impl<'a> Hasher for FoldHasher<'a> {
     #[inline(always)]
     fn write(&mut self, bytes: &[u8]) {
         // We perform overlapping reads in the byte hash which could lead to
@@ -62,41 +56,11 @@ impl Hasher for FoldHasher {
         // which costs only a single cycle (or none if executed with
         // instruction-level parallelism).
         let len = bytes.len();
-        let base_seed = rotate_right(self.accumulator, len as u32);
+        self.accumulator = rotate_right(self.accumulator, len as u32);
         if len <= 16 {
-            let mut s0 = base_seed;
-            let mut s1 = self.expand_seed;
-            // XOR the input into s0, s1, then multiply and fold.
-            if len >= 8 {
-                s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
-                s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
-            } else if len >= 4 {
-                s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
-                s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
-            } else if len > 0 {
-                let lo = bytes[0];
-                let mid = bytes[len / 2];
-                let hi = bytes[len - 1];
-                s0 ^= lo as u64;
-                s1 ^= ((hi as u64) << 8) | mid as u64;
-            }
-            self.accumulator = folded_multiply(s0, s1);
-        } else if len < 256 {
-            self.accumulator = hash_bytes_medium(
-                bytes,
-                base_seed,
-                base_seed.wrapping_add(self.expand_seed),
-                self.fold_seed,
-            );
+            self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds);
         } else {
-            self.accumulator = hash_bytes_long(
-                bytes,
-                base_seed,
-                base_seed.wrapping_add(self.expand_seed),
-                base_seed.wrapping_add(self.expand_seed2),
-                base_seed.wrapping_add(self.expand_seed3),
-                self.fold_seed,
-            );
+            self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds);
         }
     }
 
@@ -124,7 +88,7 @@ impl Hasher for FoldHasher {
     fn write_u128(&mut self, i: u128) {
         let lo = i as u64;
         let hi = (i >> 64) as u64;
-        self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
+        self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
     }
 
     #[inline(always)]
@@ -141,7 +105,7 @@ impl Hasher for FoldHasher {
         if self.sponge_len > 0 {
             let lo = self.sponge as u64;
             let hi = (self.sponge >> 64) as u64;
-            folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed)
+            folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0])
         } else {
             self.accumulator
         }
@@ -166,10 +130,10 @@ impl Default for RandomState {
 }
 
 impl BuildHasher for RandomState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
     }
 }
@@ -224,10 +188,10 @@ impl SeedableRandomState {
 }
 
 impl BuildHasher for SeedableRandomState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
     }
 }
@@ -261,10 +225,10 @@ impl Default for FixedState {
 }
 
 impl BuildHasher for FixedState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -220,8 +220,64 @@ const fn rotate_right(x: u64, r: u32) -> u64 {
     }
 }
 
-/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
-fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) -> u64 {
+/// Hashes strings <= 16 bytes, has unspecified behavior when bytes.len() < 16.
+#[inline(always)]
+fn hash_bytes_short(bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
+    let len = bytes.len();
+    let mut s0 = accumulator;
+    let mut s1 = seeds[1];
+    // XOR the input into s0, s1, then multiply and fold.
+    if len >= 8 {
+        s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
+        s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
+    } else if len >= 4 {
+        s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
+        s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
+    } else if len > 0 {
+        let lo = bytes[0];
+        let mid = bytes[len / 2];
+        let hi = bytes[len - 1];
+        s0 ^= lo as u64;
+        s1 ^= ((hi as u64) << 8) | mid as u64;
+    }
+    folded_multiply(s0, s1)
+}
+
+/// Hashes strings > 16 bytes, has unspecified behavior when bytes.len() <= 16.
+#[cold]
+#[inline(never)]
+fn hash_bytes_long(mut bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
+    let mut s0 = accumulator;
+    let mut s1 = s0.wrapping_add(seeds[1]);
+    if bytes.len() >= 256 {
+        let mut s2 = s0.wrapping_add(seeds[2]);
+        let mut s3 = s0.wrapping_add(seeds[3]);
+        let chunks = bytes.chunks_exact(64);
+        let remainder = chunks.remainder().len();
+        for chunk in chunks {
+            let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
+            let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
+            let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
+            let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
+            let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
+            let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
+            let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
+            let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
+            s0 = folded_multiply(a ^ s0, e ^ seeds[0]);
+            s1 = folded_multiply(b ^ s1, f ^ seeds[0]);
+            s2 = folded_multiply(c ^ s2, g ^ seeds[0]);
+            s3 = folded_multiply(d ^ s3, h ^ seeds[0]);
+        }
+        s0 ^= s2;
+        s1 ^= s3;
+
+        if remainder > 0 {
+            bytes = &bytes[bytes.len() - remainder.max(16)..];
+        } else {
+            return s0 ^ s1;
+        }
+    }
+
     // Process 32 bytes per iteration, 16 bytes from the start, 16 bytes from
     // the end. On the last iteration these two chunks can overlap, but that is
     // perfectly fine.
@@ -239,46 +295,9 @@ fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) ->
         let b = u64::from_ne_bytes(lo[8..16].try_into().unwrap());
         let c = u64::from_ne_bytes(hi[0..8].try_into().unwrap());
         let d = u64::from_ne_bytes(hi[8..16].try_into().unwrap());
-        s0 = folded_multiply(a ^ s0, c ^ fold_seed);
-        s1 = folded_multiply(b ^ s1, d ^ fold_seed);
+        s0 = folded_multiply(a ^ s0, c ^ seeds[0]);
+        s1 = folded_multiply(b ^ s1, d ^ seeds[0]);
     }
 
     s0 ^ s1
 }
-
-/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
-#[cold]
-#[inline(never)]
-fn hash_bytes_long(
-    bytes: &[u8],
-    mut s0: u64,
-    mut s1: u64,
-    mut s2: u64,
-    mut s3: u64,
-    fold_seed: u64,
-) -> u64 {
-    let chunks = bytes.chunks_exact(64);
-    let remainder = chunks.remainder().len();
-    for chunk in chunks {
-        let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
-        let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
-        let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
-        let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
-        let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
-        let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
-        let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
-        let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
-        s0 = folded_multiply(a ^ s0, e ^ fold_seed);
-        s1 = folded_multiply(b ^ s1, f ^ fold_seed);
-        s2 = folded_multiply(c ^ s2, g ^ fold_seed);
-        s3 = folded_multiply(d ^ s3, h ^ fold_seed);
-    }
-    s0 ^= s2;
-    s1 ^= s3;
-
-    if remainder > 0 {
-        hash_bytes_medium(&bytes[bytes.len() - remainder.max(16)..], s0, s1, fold_seed)
-    } else {
-        s0 ^ s1
-    }
-}
diff --git a/src/quality.rs b/src/quality.rs
@@ -12,22 +12,22 @@ use crate::{fast, folded_multiply, ARBITRARY0, ARBITRARY8};
 /// most likely want to use [`RandomState`], [`SeedableRandomState`] or
 /// [`FixedState`] to create [`FoldHasher`]s.
 #[derive(Clone)]
-pub struct FoldHasher {
-    pub(crate) inner: fast::FoldHasher,
+pub struct FoldHasher<'a> {
+    pub(crate) inner: fast::FoldHasher<'a>,
 }
 
-impl FoldHasher {
+impl<'a> FoldHasher<'a> {
     /// Initializes this [`FoldHasher`] with the given per-hasher seed and
     /// [`SharedSeed`].
     #[inline(always)]
-    pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
+    pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
         FoldHasher {
             inner: fast::FoldHasher::with_seed(per_hasher_seed, shared_seed),
         }
     }
 }
 
-impl Hasher for FoldHasher {
+impl<'a> Hasher for FoldHasher<'a> {
     #[inline(always)]
     fn write(&mut self, bytes: &[u8]) {
         self.inner.write(bytes);
@@ -76,10 +76,10 @@ pub struct RandomState {
 }
 
 impl BuildHasher for RandomState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher {
             inner: self.inner.build_hasher(),
         }
@@ -130,10 +130,10 @@ impl SeedableRandomState {
 }
 
 impl BuildHasher for SeedableRandomState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher {
             inner: self.inner.build_hasher(),
         }
@@ -163,10 +163,10 @@ impl FixedState {
 }
 
 impl BuildHasher for FixedState {
-    type Hasher = FoldHasher;
+    type Hasher = FoldHasher<'static>;
 
     #[inline(always)]
-    fn build_hasher(&self) -> FoldHasher {
+    fn build_hasher(&self) -> FoldHasher<'static> {
         FoldHasher {
             inner: self.inner.build_hasher(),
         }