|
| 1 | +/* |
| 2 | + * rapidhash - Very fast, high quality, platform-independent hashing algorithm. |
| 3 | + * Copyright (C) 2024 Nicolas De Carli |
| 4 | + * |
| 5 | + * Based on 'wyhash', by Wang Yi <[email protected]> |
| 6 | + * |
| 7 | + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) |
| 8 | + * |
| 9 | + * Redistribution and use in source and binary forms, with or without |
| 10 | + * modification, are permitted provided that the following conditions are |
| 11 | + * met: |
| 12 | + * |
| 13 | + * * Redistributions of source code must retain the above copyright |
| 14 | + * notice, this list of conditions and the following disclaimer. |
| 15 | + * * Redistributions in binary form must reproduce the above |
| 16 | + * copyright notice, this list of conditions and the following disclaimer |
| 17 | + * in the documentation and/or other materials provided with the |
| 18 | + * distribution. |
| 19 | + * |
| 20 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | + * |
| 32 | + * You can contact the author at: |
| 33 | + * - rapidhash source repository: https://github.com/Nicoshev/rapidhash |
| 34 | + */ |
| 35 | + |
| 36 | +/* |
| 37 | + * Includes. |
| 38 | + */ |
| 39 | +#include <stdint.h> |
| 40 | +#include <string.h> |
| 41 | +#if defined(_MSC_VER) |
| 42 | + #include <intrin.h> |
| 43 | + #if defined(_M_X64) && !defined(_M_ARM64EC) |
| 44 | + #pragma intrinsic(_umul128) |
| 45 | + #endif |
| 46 | +#endif |
| 47 | + |
| 48 | +/* |
| 49 | + * C++ macros. |
| 50 | + * |
| 51 | + * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)). |
| 52 | + */ |
| 53 | +#ifdef __cplusplus |
| 54 | + #define RAPIDHASH_NOEXCEPT noexcept |
| 55 | + #define RAPIDHASH_CONSTEXPR constexpr |
| 56 | + #ifndef RAPIDHASH_INLINE |
| 57 | + #define RAPIDHASH_INLINE inline |
| 58 | + #endif |
| 59 | +#else |
| 60 | + #define RAPIDHASH_NOEXCEPT |
| 61 | + #define RAPIDHASH_CONSTEXPR static const |
| 62 | + #ifndef RAPIDHASH_INLINE |
| 63 | + #define RAPIDHASH_INLINE static inline |
| 64 | + #endif |
| 65 | +#endif |
| 66 | + |
| 67 | +/* |
| 68 | + * Protection macro, alters behaviour of rapid_mum multiplication function. |
| 69 | + * |
| 70 | + * RAPIDHASH_FAST: Normal behavior, max speed. |
| 71 | + * RAPIDHASH_PROTECTED: Extra protection against entropy loss. |
| 72 | + */ |
| 73 | +#ifndef RAPIDHASH_PROTECTED |
| 74 | + #define RAPIDHASH_FAST |
| 75 | +#elif defined(RAPIDHASH_FAST) |
| 76 | + #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously." |
| 77 | +#endif |
| 78 | + |
| 79 | +/* |
| 80 | + * Unrolling macros, changes code definition for main hash function. |
| 81 | + * |
| 82 | + * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes. |
| 83 | + * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes. |
| 84 | + * |
| 85 | + * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED. |
| 86 | + * |
| 87 | + * These macros do not alter the output hash. |
| 88 | + */ |
| 89 | +#ifndef RAPIDHASH_COMPACT |
| 90 | + #define RAPIDHASH_UNROLLED |
| 91 | +#elif defined(RAPIDHASH_UNROLLED) |
| 92 | + #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously." |
| 93 | +#endif |
| 94 | + |
| 95 | +/* |
| 96 | + * Likely and unlikely macros. |
| 97 | + */ |
| 98 | +#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) |
| 99 | + #define _likely_(x) __builtin_expect(x,1) |
| 100 | + #define _unlikely_(x) __builtin_expect(x,0) |
| 101 | +#else |
| 102 | + #define _likely_(x) (x) |
| 103 | + #define _unlikely_(x) (x) |
| 104 | +#endif |
| 105 | + |
| 106 | +/* |
| 107 | + * Endianness macros. |
| 108 | + */ |
| 109 | +#ifndef RAPIDHASH_LITTLE_ENDIAN |
| 110 | + #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) |
| 111 | + #define RAPIDHASH_LITTLE_ENDIAN |
| 112 | + #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) |
| 113 | + #define RAPIDHASH_BIG_ENDIAN |
| 114 | + #else |
| 115 | + #warning "could not determine endianness! Falling back to little endian." |
| 116 | + #define RAPIDHASH_LITTLE_ENDIAN |
| 117 | + #endif |
| 118 | +#endif |
| 119 | + |
| 120 | +/* |
| 121 | + * Default seed. |
| 122 | + */ |
| 123 | +#define RAPID_SEED (0xbdd89aa982704029ull) |
| 124 | + |
| 125 | +/* |
| 126 | + * Default secret parameters. |
| 127 | + */ |
| 128 | +RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; |
| 129 | + |
| 130 | +/* |
| 131 | + * 64*64 -> 128bit multiply function. |
| 132 | + * |
| 133 | + * @param A Address of 64-bit number. |
| 134 | + * @param B Address of 64-bit number. |
| 135 | + * |
| 136 | + * Calculates 128-bit C = *A * *B. |
| 137 | + * |
| 138 | + * When RAPIDHASH_FAST is defined: |
| 139 | + * Overwrites A contents with C's low 64 bits. |
| 140 | + * Overwrites B contents with C's high 64 bits. |
| 141 | + * |
| 142 | + * When RAPIDHASH_PROTECTED is defined: |
| 143 | + * Xors and overwrites A contents with C's low 64 bits. |
| 144 | + * Xors and overwrites B contents with C's high 64 bits. |
| 145 | + */ |
| 146 | +RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT { |
| 147 | +#if defined(__SIZEOF_INT128__) |
| 148 | + __uint128_t r=*A; r*=*B; |
| 149 | + #ifdef RAPIDHASH_PROTECTED |
| 150 | + *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); |
| 151 | + #else |
| 152 | + *A=(uint64_t)r; *B=(uint64_t)(r>>64); |
| 153 | + #endif |
| 154 | +#elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) |
| 155 | + #if defined(_M_X64) |
| 156 | + #ifdef RAPIDHASH_PROTECTED |
| 157 | + uint64_t a, b; |
| 158 | + a=_umul128(*A,*B,&b); |
| 159 | + *A^=a; *B^=b; |
| 160 | + #else |
| 161 | + *A=_umul128(*A,*B,B); |
| 162 | + #endif |
| 163 | + #else |
| 164 | + #ifdef RAPIDHASH_PROTECTED |
| 165 | + uint64_t a, b; |
| 166 | + b = __umulh(*A, *B); |
| 167 | + a = *A * *B; |
| 168 | + *A^=a; *B^=b; |
| 169 | + #else |
| 170 | + uint64_t c = __umulh(*A, *B); |
| 171 | + *A = *A * *B; |
| 172 | + *B = c; |
| 173 | + #endif |
| 174 | + #endif |
| 175 | +#else |
| 176 | + uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; |
| 177 | + uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl; |
| 178 | + lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c; |
| 179 | + #ifdef RAPIDHASH_PROTECTED |
| 180 | + *A^=lo; *B^=hi; |
| 181 | + #else |
| 182 | + *A=lo; *B=hi; |
| 183 | + #endif |
| 184 | +#endif |
| 185 | +} |
| 186 | + |
| 187 | +/* |
| 188 | + * Multiply and xor mix function. |
| 189 | + * |
| 190 | + * @param A 64-bit number. |
| 191 | + * @param B 64-bit number. |
| 192 | + * |
| 193 | + * Calculates 128-bit C = A * B. |
| 194 | + * Returns 64-bit xor between high and low 64 bits of C. |
| 195 | + */ |
| 196 | +RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; } |
| 197 | + |
| 198 | +/* |
| 199 | + * Read functions. |
| 200 | + */ |
| 201 | +#ifdef RAPIDHASH_LITTLE_ENDIAN |
| 202 | +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;} |
| 203 | +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;} |
| 204 | +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) |
| 205 | +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);} |
| 206 | +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);} |
| 207 | +#elif defined(_MSC_VER) |
| 208 | +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);} |
| 209 | +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);} |
| 210 | +#else |
| 211 | +RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { |
| 212 | + uint64_t v; memcpy(&v, p, 8); |
| 213 | + return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); |
| 214 | +} |
| 215 | +RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { |
| 216 | + uint32_t v; memcpy(&v, p, 4); |
| 217 | + return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); |
| 218 | +} |
| 219 | +#endif |
| 220 | + |
| 221 | +/* |
| 222 | + * Reads and combines 3 bytes of input. |
| 223 | + * |
| 224 | + * @param p Buffer to read from. |
| 225 | + * @param k Length of @p, in bytes. |
| 226 | + * |
| 227 | + * Always reads and combines 3 bytes from memory. |
| 228 | + * Guarantees to read each buffer position at least once. |
| 229 | + * |
| 230 | + * Returns a 64-bit value containing all three bytes read. |
| 231 | + */ |
| 232 | +RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];} |
| 233 | + |
| 234 | +/* |
| 235 | + * rapidhash main function. |
| 236 | + * |
| 237 | + * @param key Buffer to be hashed. |
| 238 | + * @param len @key length, in bytes. |
| 239 | + * @param seed 64-bit seed used to alter the hash result predictably. |
| 240 | + * @param secret Triplet of 64-bit secrets used to alter hash result predictably. |
| 241 | + * |
| 242 | + * Returns a 64-bit hash. |
| 243 | + */ |
| 244 | +RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT { |
| 245 | + const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b; |
| 246 | + if(_likely_(len<=16)){ |
| 247 | + if(_likely_(len>=4)){ |
| 248 | + const uint8_t * plast = p + len - 4; |
| 249 | + a = (rapid_read32(p) << 32) | rapid_read32(plast); |
| 250 | + const uint64_t delta = ((len&24)>>(len>>3)); |
| 251 | + b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); } |
| 252 | + else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;} |
| 253 | + else a=b=0; |
| 254 | + } |
| 255 | + else{ |
| 256 | + size_t i=len; |
| 257 | + if(_unlikely_(i>48)){ |
| 258 | + uint64_t see1=seed, see2=seed; |
| 259 | +#ifdef RAPIDHASH_UNROLLED |
| 260 | + while(_likely_(i>=96)){ |
| 261 | + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
| 262 | + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
| 263 | + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
| 264 | + seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed); |
| 265 | + see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1); |
| 266 | + see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2); |
| 267 | + p+=96; i-=96; |
| 268 | + } |
| 269 | + if(_unlikely_(i>=48)){ |
| 270 | + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
| 271 | + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
| 272 | + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
| 273 | + p+=48; i-=48; |
| 274 | + } |
| 275 | +#else |
| 276 | + do { |
| 277 | + seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); |
| 278 | + see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); |
| 279 | + see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); |
| 280 | + p+=48; i-=48; |
| 281 | + } while (_likely_(i>=48)); |
| 282 | +#endif |
| 283 | + seed^=see1^see2; |
| 284 | + } |
| 285 | + if(i>16){ |
| 286 | + seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]); |
| 287 | + if(i>32) |
| 288 | + seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed); |
| 289 | + } |
| 290 | + a=rapid_read64(p+i-16); b=rapid_read64(p+i-8); |
| 291 | + } |
| 292 | + a^=secret[1]; b^=seed; rapid_mum(&a,&b); |
| 293 | + return rapid_mix(a^secret[0]^len,b^secret[1]); |
| 294 | +} |
| 295 | + |
| 296 | +/* |
| 297 | + * rapidhash default seeded hash function. |
| 298 | + * |
| 299 | + * @param key Buffer to be hashed. |
| 300 | + * @param len @key length, in bytes. |
| 301 | + * @param seed 64-bit seed used to alter the hash result predictably. |
| 302 | + * |
| 303 | + * Calls rapidhash_internal using provided parameters and default secrets. |
| 304 | + * |
| 305 | + * Returns a 64-bit hash. |
| 306 | + */ |
| 307 | +RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT { |
| 308 | + return rapidhash_internal(key, len, seed, rapid_secret); |
| 309 | +} |
| 310 | + |
| 311 | +/* |
| 312 | + * rapidhash default hash function. |
| 313 | + * |
| 314 | + * @param key Buffer to be hashed. |
| 315 | + * @param len @key length, in bytes. |
| 316 | + * |
| 317 | + * Calls rapidhash_withSeed using provided parameters and the default seed. |
| 318 | + * |
| 319 | + * Returns a 64-bit hash. |
| 320 | + */ |
| 321 | +RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT { |
| 322 | + return rapidhash_withSeed(key, len, RAPID_SEED); |
| 323 | +} |
0 commit comments