Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,9 @@ outputs, and feasible to derive the secret values from indirect observation of
hashes, such as through timing attacks or hash table iteration. Once an attacker
knows the secret values, they can once again create infinite hash collisions
with ease.


## Acknowledgements

We thank Liam Gray for their suggestions on improving string hashing
performance.
74 changes: 19 additions & 55 deletions src/fast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,31 @@
use core::hash::{BuildHasher, Hasher};

use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed};
use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3};
use crate::{folded_multiply, hash_bytes_long, hash_bytes_short, rotate_right, ARBITRARY3};

/// A [`Hasher`] instance implementing foldhash, optimized for speed.
///
/// While you can create one directly with [`FoldHasher::with_seed`], you
/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
/// [`FixedState`] to create [`FoldHasher`]s.
#[derive(Clone)]
pub struct FoldHasher {
pub struct FoldHasher<'a> {
accumulator: u64,
sponge: u128,
sponge_len: u8,
fold_seed: u64,
expand_seed: u64,
expand_seed2: u64,
expand_seed3: u64,
seeds: &'a [u64; 4],
}

impl FoldHasher {
impl<'a> FoldHasher<'a> {
/// Initializes this [`FoldHasher`] with the given per-hasher seed and
/// [`SharedSeed`].
#[inline]
pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
FoldHasher {
accumulator: per_hasher_seed,
sponge: 0,
sponge_len: 0,
fold_seed: shared_seed.seeds[0],
expand_seed: shared_seed.seeds[1],
expand_seed2: shared_seed.seeds[2],
expand_seed3: shared_seed.seeds[3],
seeds: &shared_seed.seeds,
}
}

Expand All @@ -43,7 +37,7 @@ impl FoldHasher {
if self.sponge_len as usize + bits > 128 {
let lo = self.sponge as u64;
let hi = (self.sponge >> 64) as u64;
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
self.sponge = x.into();
self.sponge_len = bits as u8;
} else {
Expand All @@ -53,7 +47,7 @@ impl FoldHasher {
}
}

impl Hasher for FoldHasher {
impl<'a> Hasher for FoldHasher<'a> {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) {
// We perform overlapping reads in the byte hash which could lead to
Expand All @@ -62,41 +56,11 @@ impl Hasher for FoldHasher {
// which costs only a single cycle (or none if executed with
// instruction-level parallelism).
let len = bytes.len();
let base_seed = rotate_right(self.accumulator, len as u32);
self.accumulator = rotate_right(self.accumulator, len as u32);
if len <= 16 {
let mut s0 = base_seed;
let mut s1 = self.expand_seed;
// XOR the input into s0, s1, then multiply and fold.
if len >= 8 {
s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
} else if len >= 4 {
s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
} else if len > 0 {
let lo = bytes[0];
let mid = bytes[len / 2];
let hi = bytes[len - 1];
s0 ^= lo as u64;
s1 ^= ((hi as u64) << 8) | mid as u64;
}
self.accumulator = folded_multiply(s0, s1);
} else if len < 256 {
self.accumulator = hash_bytes_medium(
bytes,
base_seed,
base_seed.wrapping_add(self.expand_seed),
self.fold_seed,
);
self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds);
} else {
self.accumulator = hash_bytes_long(
bytes,
base_seed,
base_seed.wrapping_add(self.expand_seed),
base_seed.wrapping_add(self.expand_seed2),
base_seed.wrapping_add(self.expand_seed3),
self.fold_seed,
);
self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds);
}
}

Expand Down Expand Up @@ -124,7 +88,7 @@ impl Hasher for FoldHasher {
fn write_u128(&mut self, i: u128) {
let lo = i as u64;
let hi = (i >> 64) as u64;
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed);
self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]);
}

#[inline(always)]
Expand All @@ -141,7 +105,7 @@ impl Hasher for FoldHasher {
if self.sponge_len > 0 {
let lo = self.sponge as u64;
let hi = (self.sponge >> 64) as u64;
folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed)
folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0])
} else {
self.accumulator
}
Expand All @@ -166,10 +130,10 @@ impl Default for RandomState {
}

impl BuildHasher for RandomState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get())
}
}
Expand Down Expand Up @@ -224,10 +188,10 @@ impl SeedableRandomState {
}

impl BuildHasher for SeedableRandomState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed)
}
}
Expand Down Expand Up @@ -261,10 +225,10 @@ impl Default for FixedState {
}

impl BuildHasher for FixedState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed())
}
}
101 changes: 60 additions & 41 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,64 @@ const fn rotate_right(x: u64, r: u32) -> u64 {
}
}

/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) -> u64 {
/// Hashes strings <= 16 bytes, has unspecified behavior when bytes.len() < 16.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the comment be:

Hashes strings <= 16 bytes, has unspecified behavior when bytes.len() > 16.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be, yes.

#[inline(always)]
fn hash_bytes_short(bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
let len = bytes.len();
let mut s0 = accumulator;
let mut s1 = seeds[1];
// XOR the input into s0, s1, then multiply and fold.
if len >= 8 {
s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap());
s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap());
} else if len >= 4 {
s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64;
s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
} else if len > 0 {
let lo = bytes[0];
let mid = bytes[len / 2];
let hi = bytes[len - 1];
s0 ^= lo as u64;
s1 ^= ((hi as u64) << 8) | mid as u64;
}
folded_multiply(s0, s1)
}

/// Hashes strings > 16 bytes, has unspecified behavior when bytes.len() <= 16.
#[cold]
#[inline(never)]
fn hash_bytes_long(mut bytes: &[u8], accumulator: u64, seeds: &[u64; 4]) -> u64 {
let mut s0 = accumulator;
let mut s1 = s0.wrapping_add(seeds[1]);
if bytes.len() >= 256 {
let mut s2 = s0.wrapping_add(seeds[2]);
let mut s3 = s0.wrapping_add(seeds[3]);
let chunks = bytes.chunks_exact(64);
let remainder = chunks.remainder().len();
for chunk in chunks {
let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
s0 = folded_multiply(a ^ s0, e ^ seeds[0]);
s1 = folded_multiply(b ^ s1, f ^ seeds[0]);
s2 = folded_multiply(c ^ s2, g ^ seeds[0]);
s3 = folded_multiply(d ^ s3, h ^ seeds[0]);
}
s0 ^= s2;
s1 ^= s3;

if remainder > 0 {
bytes = &bytes[bytes.len() - remainder.max(16)..];
} else {
return s0 ^ s1;
}
}

// Process 32 bytes per iteration, 16 bytes from the start, 16 bytes from
// the end. On the last iteration these two chunks can overlap, but that is
// perfectly fine.
Expand All @@ -239,46 +295,9 @@ fn hash_bytes_medium(bytes: &[u8], mut s0: u64, mut s1: u64, fold_seed: u64) ->
let b = u64::from_ne_bytes(lo[8..16].try_into().unwrap());
let c = u64::from_ne_bytes(hi[0..8].try_into().unwrap());
let d = u64::from_ne_bytes(hi[8..16].try_into().unwrap());
s0 = folded_multiply(a ^ s0, c ^ fold_seed);
s1 = folded_multiply(b ^ s1, d ^ fold_seed);
s0 = folded_multiply(a ^ s0, c ^ seeds[0]);
s1 = folded_multiply(b ^ s1, d ^ seeds[0]);
}

s0 ^ s1
}

/// Hashes strings >= 16 bytes, has unspecified behavior when bytes.len() < 16.
#[cold]
#[inline(never)]
fn hash_bytes_long(
bytes: &[u8],
mut s0: u64,
mut s1: u64,
mut s2: u64,
mut s3: u64,
fold_seed: u64,
) -> u64 {
let chunks = bytes.chunks_exact(64);
let remainder = chunks.remainder().len();
for chunk in chunks {
let a = u64::from_ne_bytes(chunk[0..8].try_into().unwrap());
let b = u64::from_ne_bytes(chunk[8..16].try_into().unwrap());
let c = u64::from_ne_bytes(chunk[16..24].try_into().unwrap());
let d = u64::from_ne_bytes(chunk[24..32].try_into().unwrap());
let e = u64::from_ne_bytes(chunk[32..40].try_into().unwrap());
let f = u64::from_ne_bytes(chunk[40..48].try_into().unwrap());
let g = u64::from_ne_bytes(chunk[48..56].try_into().unwrap());
let h = u64::from_ne_bytes(chunk[56..64].try_into().unwrap());
s0 = folded_multiply(a ^ s0, e ^ fold_seed);
s1 = folded_multiply(b ^ s1, f ^ fold_seed);
s2 = folded_multiply(c ^ s2, g ^ fold_seed);
s3 = folded_multiply(d ^ s3, h ^ fold_seed);
}
s0 ^= s2;
s1 ^= s3;

if remainder > 0 {
hash_bytes_medium(&bytes[bytes.len() - remainder.max(16)..], s0, s1, fold_seed)
} else {
s0 ^ s1
}
}
22 changes: 11 additions & 11 deletions src/quality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ use crate::{fast, folded_multiply, ARBITRARY0, ARBITRARY8};
/// most likely want to use [`RandomState`], [`SeedableRandomState`] or
/// [`FixedState`] to create [`FoldHasher`]s.
#[derive(Clone)]
pub struct FoldHasher {
pub(crate) inner: fast::FoldHasher,
pub struct FoldHasher<'a> {
pub(crate) inner: fast::FoldHasher<'a>,
}

impl FoldHasher {
impl<'a> FoldHasher<'a> {
Copy link
Contributor

@hoxxep hoxxep Aug 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I quite like how adding the lifetime parameter turned out, I expected it to be more of a pain in various ways. Slightly regret not taking this option in rapidhash now!

/// Initializes this [`FoldHasher`] with the given per-hasher seed and
/// [`SharedSeed`].
#[inline(always)]
pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher {
pub fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> {
FoldHasher {
inner: fast::FoldHasher::with_seed(per_hasher_seed, shared_seed),
}
}
}

impl Hasher for FoldHasher {
impl<'a> Hasher for FoldHasher<'a> {
#[inline(always)]
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes);
Expand Down Expand Up @@ -76,10 +76,10 @@ pub struct RandomState {
}

impl BuildHasher for RandomState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher {
inner: self.inner.build_hasher(),
}
Expand Down Expand Up @@ -130,10 +130,10 @@ impl SeedableRandomState {
}

impl BuildHasher for SeedableRandomState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher {
inner: self.inner.build_hasher(),
}
Expand Down Expand Up @@ -163,10 +163,10 @@ impl FixedState {
}

impl BuildHasher for FixedState {
type Hasher = FoldHasher;
type Hasher = FoldHasher<'static>;

#[inline(always)]
fn build_hasher(&self) -> FoldHasher {
fn build_hasher(&self) -> FoldHasher<'static> {
FoldHasher {
inner: self.inner.build_hasher(),
}
Expand Down