Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit 68149b3

Browse files
Reini Urbanrurban
Reini Urban
authored andcommitted
HeArray: remove hek_hash and refcounted_he_hash
Calculate hashes on demand, but not store it in a HEK to make HEK shorter to fill more entries into a cache line. HEK_HASH(hek) is now invalid and gone. Use the new HeHASH_calc(he), HEK_HASH_calc(hek), SvSHARED_HASH_calc(sv) instead. See http://www.ilikebigbits.com/blog/2016/8/28/designing-a-fast-hash-table for benchmarks (HashCache). And using 4 tests in the hot hash loop also makes not much sense, when checking the length and the string is enough to weed out collisions. This strategy, recomputing the hash wehen needed, is so far 1-7% slower, but we hope to get to speed with the HeARRAY patch. See below. The endgoal is to get rid of linked lists and store the collisions inlined in consecutive memory, in a HekARRAY. (len,cmp-flags,char*,other-flags,val) Measurements in "Cache-Conscious Collision Resolution in String Hash Tables" by Nikolas Askitis and Justin Zobel, Melbourne 2005 show that this is the fastest strategy for Open Hashing (chained) tables. See GH #24 and GH #102 The next idea is to use MSB varint encoding of the str length in a HEK, because our strings are usually short, len < 63, fits into one byte. We can then merge it with the cmp-flags, the flags only needed for comparison. See https://techoverflow.net/blog/2013/01/25/efficiently-encoding-variable-length-integers-in-cc/ or just <63 one byte, >63 MSB: I32 len. Note that the 1st MSB bit is already taken for UTF8.
1 parent dd6966a commit 68149b3

File tree

9 files changed

+136
-85
lines changed

9 files changed

+136
-85
lines changed

dump.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -2709,7 +2709,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
27092709
}
27102710
count++;
27112711

2712-
hash = HeHASH(he);
2712+
/*hash = HeHASH(he);*/
27132713
keysv = hv_iterkeysv(he);
27142714
keypv = SvPV_const(keysv, len);
27152715
elt = HeVAL(he);
@@ -2722,7 +2722,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
27222722
UNI_DISPLAY_QQ));
27232723
if (HvEITER_get(hv) == he)
27242724
PerlIO_printf(file, "[CURRENT] ");
2725-
PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);
2725+
/*PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);*/
27262726
do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
27272727
}
27282728
}
@@ -3268,11 +3268,13 @@ Perl_deb_hek(pTHX_ HEK* hek, SV* val)
32683268
else if (HEK_IS_SVKEY(hek)) {
32693269
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
32703270
SV* sv = *(SV**)HEK_KEY(hek);
3271-
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)HEK_HASH(hek),
3271+
U32 hash = HEK_HASH_calc(hek);
3272+
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)hash,
32723273
pretty_pv_escape( tmp, SvPVX_const(sv), SvCUR(sv), SvUTF8(sv)));
32733274
} else {
32743275
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
3275-
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)HEK_HASH(hek),
3276+
U32 hash = HEK_HASH_calc(hek);
3277+
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)hash,
32763278
pretty_pv_escape( tmp, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek)));
32773279
if (HEK_FLAGS(hek) > 1)
32783280
PerlIO_printf(Perl_debug_log, "0x%x ", HEK_FLAGS(hek));

embed.fnc

+4
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,7 @@ AbmdRp |bool |hv_exists |NULLOK HV *hv|NN const char *key|I32 klen
744744
AbmdRp |bool |hv_exists_ent |NULLOK HV *hv|NN SV *keysv|U32 hash
745745
Abmdp |SV** |hv_fetch |NULLOK HV *hv|NN const char *key|I32 klen \
746746
|I32 lval
747+
: hash ignored
747748
Abmdp |HE* |hv_fetch_ent |NULLOK HV *hv|NN SV *keysv|I32 lval|U32 hash
748749
#if defined(USE_CPERL)
749750
Ap |void* |hv_common |NULLOK HV *hv|NULLOK SV *keysv \
@@ -2129,6 +2130,7 @@ Abpd |SSize_t|unpack_str |NN const char *pat|NN const char *patend|NN const char
21292130
|I32 ocnt|U32 flags
21302131
Apd |SSize_t|unpackstring |NN const char *pat|NN const char *patend|NN const char *s \
21312132
|NN const char *strend|U32 flags
2133+
: hash ignored
21322134
Ap |void |unsharepvn |NULLOK const char* sv|I32 len|U32 hash
21332135
: Used in gv.c, hv.c
21342136
#if defined(USE_CPERL)
@@ -2525,10 +2527,12 @@ s |void |hsplit |NN HV *hv|STRLEN const oldsize|STRLEN newsize
25252527
s |void |hv_free_entries|NN HV *hv
25262528
s |SV* |hv_free_ent_ret|NN HV *hv|NN HE *entry
25272529
sR |HE* |new_he
2530+
: hash ignored
25282531
sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags
25292532
sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store
25302533
s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
25312534
# if defined(USE_CPERL)
2535+
: hash mandatory
25322536
sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
25332537
# else
25342538
: a perl5 security risk

ext/B/B.xs

+9
Original file line numberDiff line numberDiff line change
@@ -2362,6 +2362,15 @@ HeKEY(he)
23622362
U32
23632363
HeHASH(he)
23642364
B::HE he
2365+
PREINIT:
2366+
U32 hash = 0;
2367+
HEK *hek;
2368+
CODE:
2369+
hek = HeKEY_hek(he);
2370+
PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek));
2371+
RETVAL = hash;
2372+
OUTPUT:
2373+
RETVAL
23652374

23662375
I32
23672376
HeKLEN(he)

0 commit comments

Comments
 (0)