diff --git a/hv.c b/hv.c index 7cf0a4c5b20..39132d5f3eb 100644 --- a/hv.c +++ b/hv.c @@ -590,6 +590,35 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, } } + masked_flags = (flags & HVhek_MASK); + + /* cache optimization: with not many keys, avoid hash + and just do a linear search. The last entry must be a sentinel NULL */ + if (XHvSMALL(xhv)) { + entry = NULL; + assert(!(keysv && SvIsCOW_shared_hash(keysv) && HvSHAREKEYS(hv))); + for (oentry = &(HvARRAY(hv))[0]; *oentry; oentry++) { + CHECK_HASH_FLOOD(collisions) + /*if (!HeKEY_hek(entry)) + continue;*/ + /*if (HeHASH(entry) != hash) + continue;*/ + if (HeKLEN(*oentry) != klen) + continue; + if (memNE(HeKEY(*oentry),key,klen)) /* is this it? */ + continue; + if ((HeKFLAGS(*oentry) ^ masked_flags) & HVhek_UTF8) + continue; + entry = *oentry; + DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH found small %s{%s}\n", + HvNAME_get(hv)?HvNAME_get(hv):"", key)); + goto found; + } + DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH not found small %s{%s}\n", + HvNAME_get(hv)?HvNAME_get(hv):"", key)); + goto not_found; + } + if (keysv && (SvIsCOW_shared_hash(keysv))) { if (HvSHAREKEYS(hv)) { keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); @@ -609,8 +638,6 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, else if (!hash) PERL_HASH(hash, key, klen); - masked_flags = (flags & HVhek_MASK); - #ifdef DYNAMIC_ENV_FETCH if (!HvARRAY(hv)) { entry = oentry = NULL; @@ -744,7 +771,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, /* move found bucket to the front oe -> e -> A => e -> oe -> A oe -> A .. X -> e -> B => e -> oe -> A .. X -> B */ - if (!HvEITER_get(hv) && entry != *oentry) { + if (!HvEITER_get(hv) && entry != *oentry && !XHvSMALL(xhv)) { if (HeNEXT(*oentry) == entry) { DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH move up 1\t%s{%s}\n", HvNAME_get(hv)?HvNAME_get(hv):"", key)); @@ -874,7 +901,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, #endif #ifndef PERL_PERTURB_KEYS_TOP - oentry = &HvARRAY(hv)[ HvHASH_INDEX(hash, xhv->xhv_max) ]; + if (!XHvSMALL(xhv)) + oentry = &HvARRAY(hv)[ HvHASH_INDEX(hash, xhv->xhv_max) ]; #endif #if INTSIZE > 4 @@ -902,6 +930,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, else /* gotta do the real thing */ HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; + /*if (xhv->xhv_keys <= PERL_HV_SMALL_MAX) { + *(entry+1) = NULL; + }*/ if (SvOOK(hv)) { /* aux struct present */ #ifdef USE_SAFE_HASHITER @@ -933,7 +964,14 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, { /* Insert at the top which gives us the best performance */ DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH insert top\t%s{%.*s}\n", HvNAME_get(hv)?HvNAME_get(hv):"", (int)klen, key)); - HeNEXT(entry) = *oentry; /* oe -> n: e -> oe -> n */ + if (!XHvSMALL(xhv)) + HeNEXT(entry) = *oentry; /* oe -> n: e -> oe -> n */ + else +#if 1 + assert(!HeNEXT(entry)); +#else + HeNEXT(entry) = NULL; +#endif *oentry = entry; } #ifdef DEBUGGING @@ -967,7 +1005,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, HvHASKFLAGS_on(hv); xhv->xhv_keys++; - if ( DO_HSPLIT(xhv) ) { + if ( (XHvSMALL(xhv) && xhv->xhv_keys == PERL_HV_SMALL_MAX) + || DO_HSPLIT(xhv) ) + { const U32 oldsize = xhv->xhv_max + 1; const U32 items = HvPLACEHOLDERS_get(hv); DEBUG_H(PerlIO_printf(Perl_debug_log, @@ -986,10 +1026,26 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, putting in all the placeholders (first) before turning on the readonly flag, because Storable always pre-splits the hash. If we're lucky, then we may clear sufficient placeholders to - avoid needing to split the hash at all. */ - clear_placeholders(hv, items); - if (DO_HSPLIT(xhv)) - hsplit(hv, oldsize, (U32)(oldsize * 2)); + avoid needing to split the hash at all. */ + if ( xhv->xhv_keys == PERL_HV_SMALL_MAX ) { + U32 i; + /* avoid split/leaving small hash, put into placeholder slot */ + for (i = 0; i <= PERL_HV_SMALL_MAX; i++) { + HE **oentry = &(HvARRAY(hv)[i]); + if (HeVAL(*oentry) == &PL_sv_placeholder) { + DEBUG_H(PerlIO_printf(Perl_debug_log, + "HASH replace placeholder %s{%s} [%ld]\n", + HvNAME_get(hv)?HvNAME_get(hv):"", key, (long)i)); + *oentry = entry; + HvPLACEHOLDERS(hv)--; + break; + } + } + } else { + clear_placeholders(hv, items); + if (DO_HSPLIT(xhv)) + hsplit(hv, oldsize, (U32)(oldsize * 2)); + } } else { hsplit(hv, oldsize, (U32)(oldsize * 2)); } @@ -1509,6 +1565,35 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, HvHASKFLAGS_on(MUTABLE_SV(hv)); } + masked_flags = (k_flags & HVhek_MASK); + + /* cache optimization: with not many keys, avoid hash + and just do a linear search. It cannot be a shared hash, keysv_hek */ + if (XHvSMALL(xhv)) { + entry = NULL; + assert(!(keysv && SvIsCOW_shared_hash(keysv) && HvSHAREKEYS(hv))); + for (oentry = &(HvARRAY(hv))[0]; *oentry; oentry++) { + CHECK_HASH_FLOOD(collisions) + /*if (!HeKEY_hek(entry)) + continue;*/ + /*if (HeHASH(entry) != hash) + continue;*/ + if (HeKLEN(*oentry) != klen) + continue; + if (memNE(HeKEY(*oentry),key,klen)) /* is this it? */ + continue; + if ((HeKFLAGS(*oentry) ^ masked_flags) & HVhek_UTF8) + continue; + entry = *oentry; + DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH found small %s{%s}\n", + HvNAME_get(hv)?HvNAME_get(hv):"", key)); + goto found; + } + DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH not found small %s{%s}\n", + HvNAME_get(hv)?HvNAME_get(hv):"", key)); + goto not_found; + } + if (keysv && (SvIsCOW_shared_hash(keysv))) { if (HvSHAREKEYS(hv)) { keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv)); @@ -1810,10 +1895,9 @@ S_hsplit_move_aux(pTHX_ HV *hv, U32 const oldsize, U32 newsize) STATIC void S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) { - U32 i, newmax; + U32 i = 0, newmax; char *a = (char*) HvARRAY(hv); HE **aep; - bool do_aux= ( /* already have an HvAUX(hv) so we have to move it */ SvOOK(hv) || @@ -1825,14 +1909,14 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) PERL_ARGS_ASSERT_HSPLIT; - if (LIKELY(newsize > oldsize)) { + if (LIKELY(newsize > oldsize) && oldsize > 8) { PL_nomemok = TRUE; Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize) + (do_aux ? sizeof(struct xpvhv_aux) : 0), char); PL_nomemok = FALSE; - } - if (!a) { - return; + if (!a) + return; + HvARRAY(hv) = (HE**) a; } #ifdef PERL_HASH_RANDOMIZE_KEYS @@ -1850,7 +1934,7 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) newmax = newsize == U32_MAX ? newsize : newsize - 1; HvARRAY(hv) = (HE**) a; HvMAX(hv) = newmax; - if (LIKELY(newsize > oldsize)) { + if (LIKELY(newsize > oldsize) && oldsize > 8) { /* on grow before we zero the newly added memory, we * need to deal with the aux struct that may be there * or have been allocated by us */ @@ -1860,11 +1944,52 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); } - if (!HvTOTALKEYS(hv)) /* skip rest if no entries */ + if (!HvTOTALKEYS(hv)) { /* skip rest if no entries */ + if (oldsize <= 8) { /* need to resize at least */ + int n = PERL_HV_ARRAY_ALLOC_BYTES(newsize) + + (do_aux ? sizeof(struct xpvhv_aux) : 0); + PL_nomemok = TRUE; + Renew(a, n, char); + Zero(a, n, char); + PL_nomemok = FALSE; + HvARRAY(hv) = (HE**) a; + HvMAX(hv) = newsize - 1; + } return; + } aep = (HE**)a; - for (i=0; i < oldsize; i++) { + if (oldsize <= 8) { + HE **newa; + char *newap; + Newxz(newap, PERL_HV_ARRAY_ALLOC_BYTES(newsize) + + (do_aux ? sizeof(struct xpvhv_aux) : 0), char); + newa = (HE**)newap; + if (do_aux) + hsplit_move_aux(hv, oldsize, newsize); + newsize--; + for (; *aep; aep++) { + HE **entry; + const char const *key = (const char const*)HeKEY(*aep); + const I32 klen = HeKLEN(*aep); + U32 hash; + PERL_HASH(hash, key, klen); + HeHASH(*aep) = hash; /* the old hek had no hash */ + entry = &newa[hash & newsize]; + if (*entry) { /* insert at top */ + HeNEXT(*aep) = HeNEXT(*entry); + } + *entry = *aep; + } + HvARRAY(hv) = newa; + HvMAX(hv) = newsize; + return; + } + + /* now we can safely clear the second half */ + Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char); + HvMAX(hv) = newmax; + for (i=0; i. #define PERL_HASH_DEFAULT_HvMAX 7 /* Small hash optimization. https://github.com/perl11/cperl/issues/102 - If max 7 keys set the HvAUX_SMALL flag and just do a linear scan */ + If max 6 keys set the HvAUX_SMALL flag and just do a linear scan. + The 7th key must be NULL. */ -#define PERL_HV_SMALL_MAX 7 +#define PERL_HV_SMALL_MAX PERL_HASH_DEFAULT_HvMAX /* During hsplit(), if HvMAX(hv)+1 (the new bucket count) is >= this value, * we preallocate the HvAUX() struct. @@ -465,8 +466,8 @@ C. #define HvPLACEHOLDERS_set(hv,p) Perl_hv_placeholders_set(aTHX_ MUTABLE_HV(hv), p) /* cperl only*/ -#define HvSMALL(hv) (HvTOTALKEYS(hv) <= PERL_HV_SMALL_MAX) -#define XHvSMALL(xhv) (XHvTOTALKEYS(xhv) <= PERL_HV_SMALL_MAX) +#define HvSMALL(hv) (HvMAX(hv) < PERL_HV_SMALL_MAX) +#define XHvSMALL(xhv) ((xhv)->xhv_max < PERL_HV_SMALL_MAX) #define HvSHAREKEYS(hv) (SvFLAGS(hv) & SVphv_SHAREKEYS) #define HvSHAREKEYS_on(hv) (SvFLAGS(hv) |= SVphv_SHAREKEYS) @@ -533,10 +534,10 @@ C. #ifndef PERL_CORE # define Nullhek Null(HEK*) #endif -#define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0]) -#define HEK_HASH(hek) (hek)->hek_hash -#define HEK_LEN(hek) (hek)->hek_len -#define HEK_KEY(hek) (hek)->hek_key +#define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0]) +#define HEK_HASH(hek) (hek)->hek_hash +#define HEK_LEN(hek) (hek)->hek_len +#define HEK_KEY(hek) (hek)->hek_key #define HEK_FLAGS(hek) (*((unsigned char *)(HEK_KEY(hek))+HEK_LEN(hek)+1)) #define HEK_IS_SVKEY(hek) HEK_LEN(hek) == HEf_SVKEY