Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit b757a50

Browse files
Reini Urbanrurban
Reini Urban
authored andcommitted
HeArray: remove hek_hash and refcounted_he_hash
Calculate hashes on demand, but not store it in a HEK to make HEK shorter to fill more entries into a cache line. HEK_HASH(hek) is now invalid and gone. Use the new HeHASH_calc(he), HEK_HASH_calc(hek), SvSHARED_HASH_calc(sv) instead. See http://www.ilikebigbits.com/blog/2016/8/28/designing-a-fast-hash-table for benchmarks (HashCache). And using 4 tests in the hot hash loop also makes not much sense, when checking the length and the string is enough to weed out collisions. This strategy, recomputing the hash wehen needed, is so far 1-7% slower, but we hope to get to speed with the HeARRAY patch. See below. The endgoal is to get rid of linked lists and store the collisions inlined in consecutive memory, in a HekARRAY. (len,cmp-flags,char*,other-flags,val) Measurements in "Cache-Conscious Collision Resolution in String Hash Tables" by Nikolas Askitis and Justin Zobel, Melbourne 2005 show that this is the fastest strategy for Open Hashing (chained) tables. See GH #24 and GH #102 The next idea is to use MSB varint encoding of the str length in a HEK, because our strings are usually short, len < 63, fits into one byte. We can then merge it with the cmp-flags, the flags only needed for comparison. See https://techoverflow.net/blog/2013/01/25/efficiently-encoding-variable-length-integers-in-cc/ or just <63 one byte, >63 MSB: I32 len. Note that the 1st MSB bit is already taken for UTF8.
1 parent f8515f9 commit b757a50

File tree

9 files changed

+136
-85
lines changed

9 files changed

+136
-85
lines changed

dump.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -2707,7 +2707,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
27072707
}
27082708
count++;
27092709

2710-
hash = HeHASH(he);
2710+
/*hash = HeHASH(he);*/
27112711
keysv = hv_iterkeysv(he);
27122712
keypv = SvPV_const(keysv, len);
27132713
elt = HeVAL(he);
@@ -2720,7 +2720,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
27202720
UNI_DISPLAY_QQ));
27212721
if (HvEITER_get(hv) == he)
27222722
PerlIO_printf(file, "[CURRENT] ");
2723-
PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);
2723+
/*PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);*/
27242724
do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
27252725
}
27262726
}
@@ -3266,11 +3266,13 @@ Perl_deb_hek(pTHX_ HEK* hek, SV* val)
32663266
else if (HEK_IS_SVKEY(hek)) {
32673267
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
32683268
SV* sv = *(SV**)HEK_KEY(hek);
3269-
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)HEK_HASH(hek),
3269+
U32 hash = HEK_HASH_calc(hek);
3270+
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)hash,
32703271
pretty_pv_escape( tmp, SvPVX_const(sv), SvCUR(sv), SvUTF8(sv)));
32713272
} else {
32723273
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
3273-
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)HEK_HASH(hek),
3274+
U32 hash = HEK_HASH_calc(hek);
3275+
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)hash,
32743276
pretty_pv_escape( tmp, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek)));
32753277
if (HEK_FLAGS(hek) > 1)
32763278
PerlIO_printf(Perl_debug_log, "0x%x ", HEK_FLAGS(hek));

embed.fnc

+4
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,7 @@ AbmdRp |bool |hv_exists |NULLOK HV *hv|NN const char *key|I32 klen
744744
AbmdRp |bool |hv_exists_ent |NULLOK HV *hv|NN SV *keysv|U32 hash
745745
Abmdp |SV** |hv_fetch |NULLOK HV *hv|NN const char *key|I32 klen \
746746
|I32 lval
747+
: hash ignored
747748
Abmdp |HE* |hv_fetch_ent |NULLOK HV *hv|NN SV *keysv|I32 lval|U32 hash
748749
#if defined(USE_CPERL)
749750
Ap |void* |hv_common |NULLOK HV *hv|NULLOK SV *keysv \
@@ -2127,6 +2128,7 @@ Abpd |SSize_t|unpack_str |NN const char *pat|NN const char *patend|NN const char
21272128
|I32 ocnt|U32 flags
21282129
Apd |SSize_t|unpackstring |NN const char *pat|NN const char *patend|NN const char *s \
21292130
|NN const char *strend|U32 flags
2131+
: hash ignored
21302132
Ap |void |unsharepvn |NULLOK const char* sv|I32 len|U32 hash
21312133
: Used in gv.c, hv.c
21322134
#if defined(USE_CPERL)
@@ -2523,10 +2525,12 @@ s |void |hsplit |NN HV *hv|STRLEN const oldsize|STRLEN newsize
25232525
s |void |hv_free_entries|NN HV *hv
25242526
s |SV* |hv_free_ent_ret|NN HV *hv|NN HE *entry
25252527
sR |HE* |new_he
2528+
: hash ignored
25262529
sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags
25272530
sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store
25282531
s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
25292532
# if defined(USE_CPERL)
2533+
: hash mandatory
25302534
sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
25312535
# else
25322536
: a perl5 security risk

ext/B/B.xs

+9
Original file line numberDiff line numberDiff line change
@@ -2362,6 +2362,15 @@ HeKEY(he)
23622362
U32
23632363
HeHASH(he)
23642364
B::HE he
2365+
PREINIT:
2366+
U32 hash = 0;
2367+
HEK *hek;
2368+
CODE:
2369+
hek = HeKEY_hek(he);
2370+
PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek));
2371+
RETVAL = hash;
2372+
OUTPUT:
2373+
RETVAL
23652374

23662375
I32
23672376
HeKLEN(he)

0 commit comments

Comments
 (0)