Skip to content

Commit 17a551d

Browse files
authored
Merge pull request #1435 from trapexit/rapidhash
Replace usage of wyhash with rapidhash
2 parents 764bd8f + f74dc36 commit 17a551d

File tree

6 files changed

+342
-310
lines changed

6 files changed

+342
-310
lines changed

DEPENDENCIES

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
### included in repo
44

55
* libfuse: https://github.com/libfuse/libfuse (heavily modified fork of v2.x)
6-
* wyhash: https://github.com/wangyi-fudan/wyhash
6+
* rapidhash: https://github.com/Nicoshev/rapidhash
77
* ghc::filesystem: https://github.com/gulrak/filesystem
88
* nonstd::optional: https://github.com/martinmoene/optional-lite
99
* fmt: https://github.com/fmtlib/fmt

src/fs_inode.cpp

+3-9
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "ef.hpp"
2020
#include "errno.hpp"
2121
#include "fs_inode.hpp"
22-
#include "wyhash.h"
22+
#include "rapidhash.h"
2323

2424
#include <cstdint>
2525
#include <string>
@@ -61,10 +61,7 @@ path_hash(const char *fusepath_,
6161
const dev_t dev_,
6262
const ino_t ino_)
6363
{
64-
return wyhash(fusepath_,
65-
fusepath_len_,
66-
fs::inode::MAGIC,
67-
_wyp);
64+
return rapidhash(fusepath_,fusepath_len_);
6865
}
6966

7067
static
@@ -99,10 +96,7 @@ devino_hash(const char *fusepath_,
9996
buf[0] = dev_;
10097
buf[1] = ino_;
10198

102-
return wyhash((void*)&buf[0],
103-
sizeof(buf),
104-
fs::inode::MAGIC,
105-
_wyp);
99+
return rapidhash((void*)&buf[0],sizeof(buf));
106100
}
107101

108102
static

src/hashset.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#pragma once
2020

2121
#include "khash.h"
22-
#include "wyhash.h"
22+
#include "rapidhash.h"
2323

2424
KHASH_SET_INIT_INT64(hashset);
2525

@@ -45,7 +45,7 @@ class HashSet
4545
uint64_t h;
4646
khint_t key;
4747

48-
h = wyhash(str_,len_,0x7472617065786974,_wyp);
48+
h = rapidhash(str_,len_);
4949

5050
key = kh_put(hashset,_set,h,&rv);
5151
if(rv == 0)

src/rapidhash.h

+323
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
/*
2+
* rapidhash - Very fast, high quality, platform-independent hashing algorithm.
3+
* Copyright (C) 2024 Nicolas De Carli
4+
*
5+
* Based on 'wyhash', by Wang Yi <[email protected]>
6+
*
7+
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
8+
*
9+
* Redistribution and use in source and binary forms, with or without
10+
* modification, are permitted provided that the following conditions are
11+
* met:
12+
*
13+
* * Redistributions of source code must retain the above copyright
14+
* notice, this list of conditions and the following disclaimer.
15+
* * Redistributions in binary form must reproduce the above
16+
* copyright notice, this list of conditions and the following disclaimer
17+
* in the documentation and/or other materials provided with the
18+
* distribution.
19+
*
20+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
*
32+
* You can contact the author at:
33+
* - rapidhash source repository: https://github.com/Nicoshev/rapidhash
34+
*/
35+
36+
/*
37+
* Includes.
38+
*/
39+
#include <stdint.h>
40+
#include <string.h>
41+
#if defined(_MSC_VER)
42+
#include <intrin.h>
43+
#if defined(_M_X64) && !defined(_M_ARM64EC)
44+
#pragma intrinsic(_umul128)
45+
#endif
46+
#endif
47+
48+
/*
49+
* C++ macros.
50+
*
51+
* RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)).
52+
*/
53+
#ifdef __cplusplus
54+
#define RAPIDHASH_NOEXCEPT noexcept
55+
#define RAPIDHASH_CONSTEXPR constexpr
56+
#ifndef RAPIDHASH_INLINE
57+
#define RAPIDHASH_INLINE inline
58+
#endif
59+
#else
60+
#define RAPIDHASH_NOEXCEPT
61+
#define RAPIDHASH_CONSTEXPR static const
62+
#ifndef RAPIDHASH_INLINE
63+
#define RAPIDHASH_INLINE static inline
64+
#endif
65+
#endif
66+
67+
/*
68+
* Protection macro, alters behaviour of rapid_mum multiplication function.
69+
*
70+
* RAPIDHASH_FAST: Normal behavior, max speed.
71+
* RAPIDHASH_PROTECTED: Extra protection against entropy loss.
72+
*/
73+
#ifndef RAPIDHASH_PROTECTED
74+
#define RAPIDHASH_FAST
75+
#elif defined(RAPIDHASH_FAST)
76+
#error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously."
77+
#endif
78+
79+
/*
80+
* Unrolling macros, changes code definition for main hash function.
81+
*
82+
* RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes.
83+
* RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes.
84+
*
85+
* Most modern CPUs should benefit from having RAPIDHASH_UNROLLED.
86+
*
87+
* These macros do not alter the output hash.
88+
*/
89+
#ifndef RAPIDHASH_COMPACT
90+
#define RAPIDHASH_UNROLLED
91+
#elif defined(RAPIDHASH_UNROLLED)
92+
#error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously."
93+
#endif
94+
95+
/*
96+
* Likely and unlikely macros.
97+
*/
98+
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
99+
#define _likely_(x) __builtin_expect(x,1)
100+
#define _unlikely_(x) __builtin_expect(x,0)
101+
#else
102+
#define _likely_(x) (x)
103+
#define _unlikely_(x) (x)
104+
#endif
105+
106+
/*
107+
* Endianness macros.
108+
*/
109+
#ifndef RAPIDHASH_LITTLE_ENDIAN
110+
#if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111+
#define RAPIDHASH_LITTLE_ENDIAN
112+
#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
113+
#define RAPIDHASH_BIG_ENDIAN
114+
#else
115+
#warning "could not determine endianness! Falling back to little endian."
116+
#define RAPIDHASH_LITTLE_ENDIAN
117+
#endif
118+
#endif
119+
120+
/*
121+
* Default seed.
122+
*/
123+
#define RAPID_SEED (0xbdd89aa982704029ull)
124+
125+
/*
126+
* Default secret parameters.
127+
*/
128+
RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
129+
130+
/*
131+
* 64*64 -> 128bit multiply function.
132+
*
133+
* @param A Address of 64-bit number.
134+
* @param B Address of 64-bit number.
135+
*
136+
* Calculates 128-bit C = *A * *B.
137+
*
138+
* When RAPIDHASH_FAST is defined:
139+
* Overwrites A contents with C's low 64 bits.
140+
* Overwrites B contents with C's high 64 bits.
141+
*
142+
* When RAPIDHASH_PROTECTED is defined:
143+
* Xors and overwrites A contents with C's low 64 bits.
144+
* Xors and overwrites B contents with C's high 64 bits.
145+
*/
146+
RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT {
147+
#if defined(__SIZEOF_INT128__)
148+
__uint128_t r=*A; r*=*B;
149+
#ifdef RAPIDHASH_PROTECTED
150+
*A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
151+
#else
152+
*A=(uint64_t)r; *B=(uint64_t)(r>>64);
153+
#endif
154+
#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
155+
#if defined(_M_X64)
156+
#ifdef RAPIDHASH_PROTECTED
157+
uint64_t a, b;
158+
a=_umul128(*A,*B,&b);
159+
*A^=a; *B^=b;
160+
#else
161+
*A=_umul128(*A,*B,B);
162+
#endif
163+
#else
164+
#ifdef RAPIDHASH_PROTECTED
165+
uint64_t a, b;
166+
b = __umulh(*A, *B);
167+
a = *A * *B;
168+
*A^=a; *B^=b;
169+
#else
170+
uint64_t c = __umulh(*A, *B);
171+
*A = *A * *B;
172+
*B = c;
173+
#endif
174+
#endif
175+
#else
176+
uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
177+
uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
178+
lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
179+
#ifdef RAPIDHASH_PROTECTED
180+
*A^=lo; *B^=hi;
181+
#else
182+
*A=lo; *B=hi;
183+
#endif
184+
#endif
185+
}
186+
187+
/*
188+
* Multiply and xor mix function.
189+
*
190+
* @param A 64-bit number.
191+
* @param B 64-bit number.
192+
*
193+
* Calculates 128-bit C = A * B.
194+
* Returns 64-bit xor between high and low 64 bits of C.
195+
*/
196+
RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; }
197+
198+
/*
199+
* Read functions.
200+
*/
201+
#ifdef RAPIDHASH_LITTLE_ENDIAN
202+
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;}
203+
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;}
204+
#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
205+
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);}
206+
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);}
207+
#elif defined(_MSC_VER)
208+
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);}
209+
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);}
210+
#else
211+
RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT {
212+
uint64_t v; memcpy(&v, p, 8);
213+
return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
214+
}
215+
RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT {
216+
uint32_t v; memcpy(&v, p, 4);
217+
return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
218+
}
219+
#endif
220+
221+
/*
222+
* Reads and combines 3 bytes of input.
223+
*
224+
* @param p Buffer to read from.
225+
* @param k Length of @p, in bytes.
226+
*
227+
* Always reads and combines 3 bytes from memory.
228+
* Guarantees to read each buffer position at least once.
229+
*
230+
* Returns a 64-bit value containing all three bytes read.
231+
*/
232+
RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];}
233+
234+
/*
235+
* rapidhash main function.
236+
*
237+
* @param key Buffer to be hashed.
238+
* @param len @key length, in bytes.
239+
* @param seed 64-bit seed used to alter the hash result predictably.
240+
* @param secret Triplet of 64-bit secrets used to alter hash result predictably.
241+
*
242+
* Returns a 64-bit hash.
243+
*/
244+
RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT {
245+
const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b;
246+
if(_likely_(len<=16)){
247+
if(_likely_(len>=4)){
248+
const uint8_t * plast = p + len - 4;
249+
a = (rapid_read32(p) << 32) | rapid_read32(plast);
250+
const uint64_t delta = ((len&24)>>(len>>3));
251+
b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); }
252+
else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;}
253+
else a=b=0;
254+
}
255+
else{
256+
size_t i=len;
257+
if(_unlikely_(i>48)){
258+
uint64_t see1=seed, see2=seed;
259+
#ifdef RAPIDHASH_UNROLLED
260+
while(_likely_(i>=96)){
261+
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
262+
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
263+
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
264+
seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed);
265+
see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1);
266+
see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2);
267+
p+=96; i-=96;
268+
}
269+
if(_unlikely_(i>=48)){
270+
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
271+
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
272+
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
273+
p+=48; i-=48;
274+
}
275+
#else
276+
do {
277+
seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
278+
see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
279+
see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
280+
p+=48; i-=48;
281+
} while (_likely_(i>=48));
282+
#endif
283+
seed^=see1^see2;
284+
}
285+
if(i>16){
286+
seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]);
287+
if(i>32)
288+
seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed);
289+
}
290+
a=rapid_read64(p+i-16); b=rapid_read64(p+i-8);
291+
}
292+
a^=secret[1]; b^=seed; rapid_mum(&a,&b);
293+
return rapid_mix(a^secret[0]^len,b^secret[1]);
294+
}
295+
296+
/*
297+
* rapidhash default seeded hash function.
298+
*
299+
* @param key Buffer to be hashed.
300+
* @param len @key length, in bytes.
301+
* @param seed 64-bit seed used to alter the hash result predictably.
302+
*
303+
* Calls rapidhash_internal using provided parameters and default secrets.
304+
*
305+
* Returns a 64-bit hash.
306+
*/
307+
RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT {
308+
return rapidhash_internal(key, len, seed, rapid_secret);
309+
}
310+
311+
/*
312+
* rapidhash default hash function.
313+
*
314+
* @param key Buffer to be hashed.
315+
* @param len @key length, in bytes.
316+
*
317+
* Calls rapidhash_withSeed using provided parameters and the default seed.
318+
*
319+
* Returns a 64-bit hash.
320+
*/
321+
RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT {
322+
return rapidhash_withSeed(key, len, RAPID_SEED);
323+
}

0 commit comments

Comments
 (0)