From 7a82561c8f01868b491c3d4807ed26376dada3c6 Mon Sep 17 00:00:00 2001 From: Reini Urban Date: Tue, 1 Mar 2016 11:01:23 +0100 Subject: [PATCH] HeArray: remove hek_hash and refcounted_he_hash Calculate hashes on demand, but not store it in a HEK to make HEK shorter to fill more entries into a cache line. HEK_HASH(hek) is now invalid and gone. Use the new HeHASH_calc(he), HEK_HASH_calc(hek), SvSHARED_HASH_calc(sv) instead. See http://www.ilikebigbits.com/blog/2016/8/28/designing-a-fast-hash-table for benchmarks (HashCache). And using 4 tests in the hot hash loop also makes not much sense, when checking the length and the string is enough to weed out collisions. This strategy, recomputing the hash wehen needed, is so far 1-7% slower, but we hope to get to speed with the HeARRAY patch. See below. The endgoal is to get rid of linked lists and store the collisions inlined in consecutive memory, in a HekARRAY. (len,cmp-flags,char*,other-flags,val) Measurements in "Cache-Conscious Collision Resolution in String Hash Tables" by Nikolas Askitis and Justin Zobel, Melbourne 2005 show that this is the fastest strategy for Open Hashing (chained) tables. See GH #24 and GH #102 The next idea is to use MSB varint encoding of the str length in a HEK, because our strings are usually short, len < 63, fits into one byte. We can then merge it with the cmp-flags, the flags only needed for comparison. See https://techoverflow.net/blog/2013/01/25/efficiently-encoding-variable-length-integers-in-cc/ or just <63 one byte, >63 MSB: I32 len. Note that the 1st MSB bit is already taken for UTF8. --- dump.c | 10 ++-- embed.fnc | 4 ++ ext/B/B.xs | 9 +++ hv.c | 148 ++++++++++++++++++++++++++--------------------- hv.h | 25 +++++--- mro_core.c | 6 +- pod/perlguts.pod | 7 ++- sv.h | 10 +++- universal.c | 2 +- 9 files changed, 136 insertions(+), 85 deletions(-) diff --git a/dump.c b/dump.c index ce9146af813..b2906ee20a1 100644 --- a/dump.c +++ b/dump.c @@ -2707,7 +2707,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, } count++; - hash = HeHASH(he); + /*hash = HeHASH(he);*/ keysv = hv_iterkeysv(he); keypv = SvPV_const(keysv, len); elt = HeVAL(he); @@ -2720,7 +2720,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, UNI_DISPLAY_QQ)); if (HvEITER_get(hv) == he) PerlIO_printf(file, "[CURRENT] "); - PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash); + /*PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);*/ do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim); } } @@ -3264,11 +3264,13 @@ Perl_deb_hek(pTHX_ HEK* hek, SV* val) else if (HEK_IS_SVKEY(hek)) { SV * const tmp = newSVpvs_flags("", SVs_TEMP); SV* sv = *(SV**)HEK_KEY(hek); - PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)HEK_HASH(hek), + U32 hash = HEK_HASH_calc(hek); + PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)hash, pretty_pv_escape( tmp, SvPVX_const(sv), SvCUR(sv), SvUTF8(sv))); } else { SV * const tmp = newSVpvs_flags("", SVs_TEMP); - PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)HEK_HASH(hek), + U32 hash = HEK_HASH_calc(hek); + PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)hash, pretty_pv_escape( tmp, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek))); if (HEK_FLAGS(hek) > 1) PerlIO_printf(Perl_debug_log, "0x%x ", HEK_FLAGS(hek)); diff --git a/embed.fnc b/embed.fnc index 3fcf0e242c1..86e9f307db2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -743,6 +743,7 @@ AbmdRp |bool |hv_exists |NULLOK HV *hv|NN const char *key|I32 klen AbmdRp |bool |hv_exists_ent |NULLOK HV *hv|NN SV *keysv|U32 hash Abmdp |SV** |hv_fetch |NULLOK HV *hv|NN const char *key|I32 klen \ |I32 lval +: hash ignored Abmdp |HE* |hv_fetch_ent |NULLOK HV *hv|NN SV *keysv|I32 lval|U32 hash #if defined(USE_CPERL) Ap |void* |hv_common |NULLOK HV *hv|NULLOK SV *keysv \ @@ -2121,6 +2122,7 @@ Abpd |SSize_t|unpack_str |NN const char *pat|NN const char *patend|NN const char |I32 ocnt|U32 flags Apd |SSize_t|unpackstring |NN const char *pat|NN const char *patend|NN const char *s \ |NN const char *strend|U32 flags +: hash ignored Ap |void |unsharepvn |NULLOK const char* sv|I32 len|U32 hash : Used in gv.c, hv.c #if defined(USE_CPERL) @@ -2517,10 +2519,12 @@ s |void |hsplit |NN HV *hv|STRLEN const oldsize|STRLEN newsize s |void |hv_free_entries|NN HV *hv s |SV* |hv_free_ent_ret|NN HV *hv|NN HE *entry sR |HE* |new_he +: hash ignored sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash # if defined(USE_CPERL) +: hash mandatory sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags # else : a perl5 security risk diff --git a/ext/B/B.xs b/ext/B/B.xs index ae7feb1757f..b23edc54f93 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -2362,6 +2362,15 @@ HeKEY(he) U32 HeHASH(he) B::HE he + PREINIT: + U32 hash = 0; + HEK *hek; + CODE: + hek = HeKEY_hek(he); + PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek)); + RETVAL = hash; + OUTPUT: + RETVAL I32 HeKLEN(he) diff --git a/hv.c b/hv.c index 7cf0a4c5b20..68651d1e0d5 100644 --- a/hv.c +++ b/hv.c @@ -136,7 +136,7 @@ S_save_hek_flags(const char *str, I32 len, U32 hash, int flags) Copy(str, HEK_KEY(hek), len, char); HEK_KEY(hek)[len] = 0; HEK_LEN(hek) = len; - HEK_HASH(hek) = hash; + /*HEK_HASH(hek) = hash;*/ HEK_FLAGS(hek) = (unsigned char)flags_masked | HVhek_UNSHARED; if (flags & HVhek_FREEKEY) @@ -178,8 +178,9 @@ Perl_hek_dup(pTHX_ HEK *source, CLONE_PARAMS* param) (void)share_hek_hek(shared); } else { + PERL_HASH(hash, HEK_KEY(source), HEK_LEN(source)); shared = share_hek_flags(HEK_KEY(source), HEK_LEN(source), - HEK_HASH(source), HEK_FLAGS(source)); + hash, HEK_FLAGS(source)); ptr_table_store(PL_ptr_table, source, shared); } return shared; @@ -189,6 +190,7 @@ HE * Perl_he_dup(pTHX_ const HE *e, bool shared, CLONE_PARAMS* param) { HE *ret; + U32 hash; PERL_ARGS_ASSERT_HE_DUP; @@ -221,15 +223,18 @@ Perl_he_dup(pTHX_ const HE *e, bool shared, CLONE_PARAMS* param) (void)share_hek_hek(shared); } else { + PERL_HASH(hash, HEK_KEY(source), HEK_LEN(source)); shared = share_hek_flags(HEK_KEY(source), HEK_LEN(source), - HEK_HASH(source), HEK_FLAGS(source)); + hash, HEK_FLAGS(source)); ptr_table_store(PL_ptr_table, source, shared); } HeKEY_hek(ret) = shared; } - else - HeKEY_hek(ret) = save_hek_flags(HeKEY(e), HeKLEN(e), HeHASH(e), + else { + PERL_HASH(hash, HEK_KEY(source), HEK_LEN(source)); + HeKEY_hek(ret) = save_hek_flags(HeKEY(e), HeKLEN(e), 0/*HeHASH(e)*/, HeKFLAGS(e)); + } HeVAL(ret) = sv_dup_inc(HeVAL(e), param); return ret; } @@ -545,7 +550,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, || (SvRMAGICAL((const SV *)hv) && mg_find((const SV *)hv, PERL_MAGIC_env)) #endif - ) { + ) { char *array; Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(xhv->xhv_max+1), @@ -593,18 +598,19 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, if (keysv && (SvIsCOW_shared_hash(keysv))) { if (HvSHAREKEYS(hv)) { keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + PERL_HASH(hash, HEK_KEY(keysv_hek), HEK_LEN(keysv_hek)); DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH SHAREKEYS \t%s{%.*s SV}\n", HvNAME_get(hv)?HvNAME_get(hv):"", (int)HEK_LEN(keysv_hek), HEK_KEY(keysv_hek))); } else { + PERL_HASH(hash, SvPVX_const(keysv), SvCUR(keysv)); DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH \t\t%s{%.*s SV 0x%x}\n", HvNAME_get(hv)?HvNAME_get(hv):"", (int)SvCUR(keysv), SvPVX_const(keysv), HEK_FLAGS(SvSHARED_HEK_FROM_PV(SvPVX_const(keysv))))); } - hash = SvSHARED_HASH(keysv); } else if (!hash) PERL_HASH(hash, key, klen); @@ -661,8 +667,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, for (; entry; entry = HeNEXT(entry)) { CHECK_HASH_FLOOD(collisions) - if (HeHASH(entry) != hash) /* strings can't be equal */ - continue; + /*if (HeHASH(entry) != hash) / * strings can't be equal * / + continue;*/ if (HeKLEN(entry) != klen) continue; if (memNE(HeKEY(entry),key,klen)) /* is this it? */ @@ -1398,7 +1404,7 @@ Perl_hv_bucket_ratio(pTHX_ HV *hv) } else sv = SV_ZERO; - + return sv; } @@ -1512,17 +1518,18 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, if (keysv && (SvIsCOW_shared_hash(keysv))) { if (HvSHAREKEYS(hv)) { keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); + PERL_HASH(hash, HEK_KEY(keysv_hek), HEK_LEN(keysv_hek)); DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH SHAREKEYS + shared_hash\tdelete %s{%.*s}\n", HvNAME_get(hv)?HvNAME_get(hv):"", (int)HEK_LEN(keysv_hek), HEK_KEY(keysv_hek))); } else { + PERL_HASH(hash, SvPVX_const(keysv), SvCUR(keysv)); DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH shared_hash\t\tdelete %s{%.*s}\n", HvNAME_get(hv)?HvNAME_get(hv):"", (int)SvCUR(keysv), SvPVX_const(keysv))); } - hash = SvSHARED_HASH(keysv); } else if (!hash) PERL_HASH(hash, key, klen); @@ -1549,7 +1556,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, int keysv_flags = HEK_FLAGS(keysv_hek); for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { - HEK *hek = HeKEY_hek(entry); + const HEK *hek = HeKEY_hek(entry); DEBUG_H(collisions++); if (hek == keysv_hek) goto found; @@ -1564,14 +1571,13 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, } for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { + const HEK *hek = HeKEY_hek(entry); CHECK_HASH_FLOOD(collisions) - if (HeHASH(entry) != hash) /* strings can't be equal */ - continue; - if (HeKLEN(entry) != klen) - continue; - if (memNE(HeKEY(entry),key,klen)) /* is this it? */ - continue; - if ((HeKFLAGS(entry) ^ masked_flags) & HVhek_UTF8) + if (HEK_LEN(hek) != klen) + continue; + if (memNE(HEK_KEY(hek),key,klen)) /* is this it? */ + continue; + if ((HEK_FLAGS(hek) ^ masked_flags) & HVhek_UTF8) continue; found: @@ -1598,7 +1604,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, if (k_flags & HVhek_FREEKEY) Safefree(key); - /* If this is a stash and the key ends with ::, then someone is + /* If this is a stash and the key ends with ::, then someone is * deleting a package. */ if (HeVAL(entry) && HvENAME_get(hv)) { @@ -1869,7 +1875,11 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) HE *entry = aep[i]; while (entry) { /* non-existent */ - U32 j = (HeHASH(entry) & newmax); + U32 hash; + U32 j; + HEK *hek = HeKEY_hek(entry); + PERL_HASH(hash,HEK_KEY(hek),HEK_LEN(hek)); + j = hash & newmax; /* was HeHASH */ #ifdef DEBUGGING if (DEBUG_H_TEST_ && DEBUG_v_TEST_) { PerlIO_printf(Perl_debug_log, "HASH split %u->%u\n",(unsigned)i,(unsigned)j); @@ -1884,7 +1894,7 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) * and use the new low bit to decide if we insert at top, * or next from top. IOW, we only rotate on a collision.*/ if (aep[j] && PL_HASH_RAND_BITS_ENABLED) { - PL_hash_rand_bits+= ROTL32(HeHASH(entry), 17); + PL_hash_rand_bits+= ROTL32(hash, 17); PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1); if (PL_hash_rand_bits & 1) { HeNEXT(entry)= HeNEXT(aep[j]); @@ -2060,11 +2070,15 @@ Perl_newHVhv(pTHX_ HV *ohv) const HEK *hek = HeKEY_hek(oent); HE * const ent = new_HE(); SV * const val = HeVAL(oent); + U32 hash = 0; + if (shared) { + PERL_HASH(hash,HEK_KEY(hek),HEK_LEN(hek)); + } HeVAL(ent) = SvIMMORTAL(val) ? val : newSVsv(val); HeKEY_hek(ent) = shared - ? share_hek_flags(HEK_KEY(hek), HEK_LEN(hek), HEK_HASH(hek), HEK_FLAGS(hek)) - : save_hek_flags(HEK_KEY(hek), HEK_LEN(hek), HEK_HASH(hek), HEK_FLAGS(hek)); + ? share_hek_flags(HEK_KEY(hek), HEK_LEN(hek), hash, HEK_FLAGS(hek)) + : save_hek_flags(HEK_KEY(hek), HEK_LEN(hek), hash, HEK_FLAGS(hek)); if (prev) HeNEXT(prev) = ent; else @@ -2096,7 +2110,7 @@ Perl_newHVhv(pTHX_ HV *ohv) (void)hv_store_ent(hv, keysv, val, 0); else (void)hv_store_flags(hv, HeKEY(entry), HeKLEN(entry), val, - HeHASH(entry), HeKFLAGS(entry)); + 0/*HeHASH(entry)*/, HeKFLAGS(entry)); } HvRITER_set(ohv, riter); HvEITER_set(ohv, eiter); @@ -2145,7 +2159,7 @@ Perl_hv_copy_hints_hv(pTHX_ HV *const ohv) (void)hv_store_ent(hv, heksv, sv, 0); else { (void)hv_common(hv, heksv, HeKEY(entry), HeKLEN(entry), - HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, HeHASH(entry)); + HeKFLAGS(entry), HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, sv, 0); SvREFCNT_dec_NN(heksv); } } @@ -3051,7 +3065,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) HEK **victim = namep + (count < 0 ? -count : count); while (victim-- > namep + 1) if ( - (HEK_UTF8(*victim) || (flags & SVf_UTF8)) + (HEK_UTF8(*victim) || (flags & SVf_UTF8)) ? hek_eq_pvn_flags(aTHX_ *victim, name, (I32)len, flags) : (HEK_LEN(*victim) == (I32)len && memEQ(HEK_KEY(*victim), name, len)) ) { @@ -3075,7 +3089,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) return; } if ( - count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8)) + count > 0 && ((HEK_UTF8(*namep) || (flags & SVf_UTF8)) ? hek_eq_pvn_flags(aTHX_ *namep, name, (I32)len, flags) : (HEK_LEN(*namep) == (I32)len && memEQ(HEK_KEY(*namep), name, len)) ) @@ -3084,7 +3098,7 @@ Perl_hv_ename_delete(pTHX_ HV *hv, const char *name, U32 len, U32 flags) } } else if( - (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8)) + (HEK_UTF8(aux->xhv_name_u.xhvnameu_name) || (flags & SVf_UTF8)) ? hek_eq_pvn_flags(aTHX_ aux->xhv_name_u.xhvnameu_name, name, (I32)len, flags) : (HEK_LEN(aux->xhv_name_u.xhvnameu_name) == (I32)len && memEQ(HEK_KEY(aux->xhv_name_u.xhvnameu_name), name, len)) @@ -3513,17 +3527,23 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) return; } - hash = HEK_HASH(hek); - } else if (len < 0) { - STRLEN tmplen = -len; - is_utf8 = TRUE; - /* See the note in hv_fetch(). --jhi */ - str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8); - len = tmplen; - if (is_utf8) - k_flags = HVhek_UTF8; - if (str != save) - k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY; + /*hash = HEK_HASH(hek);*/ + PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek)); + } else { + if (len < 0) { + STRLEN tmplen = -len; + is_utf8 = TRUE; + /* See the note in hv_fetch(). --jhi */ + str = (char*)bytes_from_utf8((U8*)str, &tmplen, &is_utf8); + len = tmplen; + PERL_HASH(hash, str, len); + if (is_utf8) + k_flags = HVhek_UTF8; + if (str != save) + k_flags |= HVhek_WASUTF8 | HVhek_FREEKEY; + } else { + PERL_HASH(hash, str, len); + } } /* what follows was the moral equivalent of: @@ -3543,11 +3563,9 @@ S_unshare_hek_or_pvn(pTHX_ const HEK *hek, const char *str, I32 len, U32 hash) } else { const int flags_masked = k_flags & HVhek_MASK; for (entry = *oentry; entry; oentry = &HeNEXT(entry), entry = *oentry) { - if (HeHASH(entry) != hash) /* strings can't be equal */ - continue; if (HeKLEN(entry) != len) continue; - if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ + if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) continue; if (HeKFLAGS(entry) != flags_masked) continue; @@ -3660,8 +3678,6 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) entry = HvARRAY(PL_strtab)[hindex]; for (; entry; entry = HeNEXT(entry)) { CHECK_HASH_FLOOD(collisions) - if (HeHASH(entry) != hash) /* strings can't be equal */ - continue; if (HeKLEN(entry) != len) continue; if (HeKEY(entry) != str && memNE(HeKEY(entry),str,len)) /* is this it? */ @@ -3702,7 +3718,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) Copy(str, HEK_KEY(hek), len, char); HEK_KEY(hek)[len] = 0; HEK_LEN(hek) = len; - HEK_HASH(hek) = hash; + /*HEK_HASH(hek) = hash;*/ HEK_FLAGS(hek) = (unsigned char)flags_masked; /* Still "point" to the HEK, so that other code need not know what @@ -3878,17 +3894,18 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) placeholders = 0; while (chain) { -#ifdef USE_ITHREADS - U32 hash = chain->refcounted_he_hash; -#else - U32 hash = HEK_HASH(chain->refcounted_he_hek); -#endif - HE **oentry = &HvARRAY(hv)[ HvHASH_INDEX(hash, max) ]; - HE *entry = *oentry; - SV *value; + HEK* hek = chain->refcounted_he_hek; + HE *entry; + HE **oentry; + SV *value; + U32 hash; + + PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek)); + oentry = &((HvARRAY(hv))[hash & max]); + entry = *oentry; for (; entry; entry = HeNEXT(entry)) { - if (HeHASH(entry) == hash) { + /* if (HeHASH(entry) == hash) { */ /* We might have a duplicate key here. If so, entry is older than the key we've already put in the hash, so if they are the same, skip adding entry. */ @@ -3909,8 +3926,7 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) HeKLEN(entry))) goto next_please; #endif - } - CHECK_HASH_FLOOD(collisions) + CHECK_HASH_FLOOD(collisions) } assert (!entry); entry = new_HE(); @@ -3919,7 +3935,7 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) HeKEY_hek(entry) = share_hek_flags(REF_HE_KEY(chain), chain->refcounted_he_keylen, - chain->refcounted_he_hash, + hash, (chain->refcounted_he_data[0] & (HVhek_UTF8|HVhek_WASUTF8))); #else @@ -4023,12 +4039,12 @@ Perl_refcounted_he_fetch_pvn(pTHX_ const struct refcounted_he *chain, for (; chain; chain = chain->refcounted_he_next) { if ( #ifdef USE_ITHREADS - hash == chain->refcounted_he_hash && + /*hash == chain->refcounted_he_hash &&*/ keylen == chain->refcounted_he_keylen && memEQ(REF_HE_KEY(chain), keypv, keylen) && utf8_flag == (chain->refcounted_he_data[0] & HVhek_UTF8) #else - hash == HEK_HASH(chain->refcounted_he_hek) && + /*hash == HEK_HASH(chain->refcounted_he_hek) &&*/ keylen == (STRLEN)HEK_LEN(chain->refcounted_he_hek) && memEQ(HEK_KEY(chain->refcounted_he_hek), keypv, keylen) && utf8_flag == (HEK_FLAGS(chain->refcounted_he_hek) & HVhek_UTF8) @@ -4084,8 +4100,8 @@ Perl_refcounted_he_fetch_sv(pTHX_ const struct refcounted_he *chain, keypv = SvPV_const(key, keylen); if (SvUTF8(key)) flags |= REFCOUNTED_HE_KEY_UTF8; - if (!hash && SvIsCOW_shared_hash(key)) - hash = SvSHARED_HASH(key); + /*if (!hash && SvIsCOW_shared_hash(key)) + hash = SvSHARED_HASH(key);*/ return refcounted_he_fetch_pvn(chain, keypv, keylen, hash, flags); } @@ -4220,7 +4236,7 @@ Perl_refcounted_he_new_pvn(pTHX_ struct refcounted_he *parent, } #ifdef USE_ITHREADS - he->refcounted_he_hash = hash; + /*he->refcounted_he_hash = hash;*/ he->refcounted_he_keylen = keylen; Copy(keypv, he->refcounted_he_data + key_offset, keylen, char); #else @@ -4272,8 +4288,8 @@ Perl_refcounted_he_new_sv(pTHX_ struct refcounted_he *parent, keypv = SvPV_const(key, keylen); if (SvUTF8(key)) flags |= REFCOUNTED_HE_KEY_UTF8; - if (!hash && SvIsCOW_shared_hash(key)) - hash = SvSHARED_HASH(key); + /*if (!hash && SvIsCOW_shared_hash(key)) + hash = SvSHARED_HASH(key);*/ return refcounted_he_new_pvn(parent, keypv, keylen, hash, value, flags); } @@ -4303,7 +4319,7 @@ Perl_refcounted_he_free(pTHX_ struct refcounted_he *he) { HINTS_REFCNT_LOCK; new_count = --he->refcounted_he_refcnt; HINTS_REFCNT_UNLOCK; - + if (new_count) { return; } diff --git a/hv.h b/hv.h index 38f60fb0af6..b733646bcd4 100644 --- a/hv.h +++ b/hv.h @@ -50,7 +50,6 @@ struct he { /* hash key -- defined separately for use as shared pointer */ struct hek { - U32 hek_hash; /* hash of key */ I32 hek_len; /* length of hash key */ #if defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN) || defined(__SUNPRO_C) char hek_key[1]; /* variable-length hash key */ @@ -59,7 +58,7 @@ struct hek { #endif /* the hash-key is \0-terminated */ /* after the \0 there is a byte for flags, such as whether the key - is UTF-8 */ + is UTF-8. See HVhek_* */ }; struct shared_he { @@ -295,7 +294,9 @@ to. =for apidoc Am|U32|HeHASH|HE* he -Returns the computed hash stored in the hash entry. +Invalid. The computed hash is not stored in the hash entry anymore. +Within PERL_CORE a call to HeHASH will assert, outside PERL_CORE C<0> +is returned. To compute the hash use C. =for apidoc Am|char*|HePV|HE* he|STRLEN len @@ -510,7 +511,8 @@ C. #define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he)) #define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he)) #define HeVAL(he) (he)->he_valu.hent_val -#define HeHASH(he) HEK_HASH(HeKEY_hek(he)) +#define HeHASH(he) 0 +#define HeHASH_calc(he) HEK_HASH_calc(HeKEY_hek(he)) /* Here we require a STRLEN lp */ #define HePV(he,lp) ((HeKLEN(he) == HEf_SVKEY) ? \ SvPV(HeKEY_sv(he),lp) : \ @@ -534,7 +536,12 @@ C. # define Nullhek Null(HEK*) #endif #define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0]) -#define HEK_HASH(hek) (hek)->hek_hash +/* This is gone. You can compute it with he _calc macros, but is mostly + not needed. */ +#ifdef PERL_CORE +#define HEK_HASH(hek) assert(0 && "no HEK_HASH in core") +#endif +#define HEK_HASH_calc(hek) PERL_HASH(hash,HEK_KEY(hek),HEK_LEN(hek)) #define HEK_LEN(hek) (hek)->hek_len #define HEK_KEY(hek) (hek)->hek_key #define HEK_FLAGS(hek) (*((unsigned char *)(HEK_KEY(hek))+HEK_LEN(hek)+1)) @@ -690,17 +697,17 @@ C. # define hv_storehek(hv, hek, val) \ hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ - HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, (val), HEK_HASH(hek)) + HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, (val), 0) # define hv_fetchhek(hv, hek, lval) \ ((SV **) \ hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ (lval) \ ? (HV_FETCH_JUST_SV | HV_FETCH_LVALUE) \ : HV_FETCH_JUST_SV, \ - NULL, HEK_HASH(hek))) + NULL, 0)) # define hv_deletehek(hv, hek, discard) \ hv_common((hv), NULL, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek), \ - (discard)|HV_DELETE, NULL, HEK_HASH(hek)) + (discard)|HV_DELETE, NULL, 0) #endif /* This refcounted he structure is used for storing the hints used for lexical @@ -723,7 +730,7 @@ struct refcounted_he; struct refcounted_he { struct refcounted_he *refcounted_he_next; /* next entry in chain */ #ifdef USE_ITHREADS - U32 refcounted_he_hash; + /*U32 refcounted_he_hash;*/ U32 refcounted_he_keylen; #else HEK *refcounted_he_hek; /* hint key */ diff --git a/mro_core.c b/mro_core.c index 879dcbd82d5..95ac42060f0 100644 --- a/mro_core.c +++ b/mro_core.c @@ -450,7 +450,7 @@ Perl_mro_get_linear_isa(pTHX_ HV *stash) (void)hv_common(isa_hash, NULL, HEK_KEY(canon_name), HEK_LEN(canon_name), HEK_FLAGS(canon_name), HV_FETCH_ISSTORE, UNDEF, - HEK_HASH(canon_name)); + 0/*HEK_HASH(canon_name)*/); (void)hv_store(isa_hash, "UNIVERSAL", 9, UNDEF, 0); SvREADONLY_on(isa_hash); @@ -636,7 +636,7 @@ Perl_mro_isa_changed_in(pTHX_ HV* stash) assert(namehek); mro_clean_isarev( isa, HEK_KEY(namehek), HEK_LEN(namehek), - HvMROMETA(revstash)->isa, HEK_HASH(namehek), + HvMROMETA(revstash)->isa, 0/*HEK_HASH(namehek)*/, HEK_UTF8(namehek) ); } @@ -678,7 +678,7 @@ Perl_mro_isa_changed_in(pTHX_ HV* stash) /* Delete our name from our former parents' isarevs. */ if (isa && HvARRAY(isa)) mro_clean_isarev(isa, stashname, stashname_len, meta->isa, - HEK_HASH(stashhek), HEK_UTF8(stashhek)); + 0/*HEK_HASH(stashhek)*/, HEK_UTF8(stashhek)); } /* Deletes name from all the isarev entries listed in isa */ diff --git a/pod/perlguts.pod b/pod/perlguts.pod index 5c03ba9b6f7..0c6106dbff4 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -577,11 +577,16 @@ L for detailed descriptions of these macros. HePV(HE* he, STRLEN len) HeVAL(HE* he) - HeHASH(HE* he) HeSVKEY(HE* he) HeSVKEY_force(HE* he) HeSVKEY_set(HE* he, SV* sv) +C is not supported anymore, as the hash is now +computed and not stored in the HE. Ditto for C. +Both return 0. +C is now undefined. +C can be used to calculate the hash value from a HEK. + These two lower level macros are defined, but must only be used when dealing with keys that are not Cs: diff --git a/sv.h b/sv.h index 2ff71afe611..4ba1178dc94 100644 --- a/sv.h +++ b/sv.h @@ -1976,7 +1976,15 @@ Like C but doesn't process magic. #define SvSHARED_HEK_FROM_PV(pvx) \ ((struct hek*)(pvx - STRUCT_OFFSET(struct hek, hek_key))) -#define SvSHARED_HASH(sv) (0 + SvSHARED_HEK_FROM_PV(SvPVX_const(sv))->hek_hash) +/* This is gone. You can compute it with he _calc macros, but is mostly + not needed. */ +#ifdef PERL_CORE +#define SvSHARED_HASH(sv) assert(0 && "no hek_hash") +#else +#define SvSHARED_HASH(sv) 0 +#endif +#define SvSHARED_HASH_calc(sv) \ + HEK_HASH_calc(SvSHARED_HEK_FROM_PV(SvPVX_const(sv))) /* flag values for sv_*_flags functions */ #define SV_IMMEDIATE_UNREF 1 diff --git a/universal.c b/universal.c index 3d66d448d9a..580b27e9c29 100644 --- a/universal.c +++ b/universal.c @@ -69,7 +69,7 @@ S_isa_lookup(pTHX_ HV *stash, const char * const name, STRLEN len, U32 flags) assert(canon_name); if (hv_common(isa, NULL, HEK_KEY(canon_name), HEK_LEN(canon_name), HEK_FLAGS(canon_name), - HV_FETCH_ISEXISTS, NULL, HEK_HASH(canon_name))) { + HV_FETCH_ISEXISTS, NULL, 0/*HEK_HASH(canon_name)*/)) { return TRUE; } }