Skip to content
This repository has been archived by the owner on Jun 1, 2023. It is now read-only.

Commit

Permalink
smallhash: 2nd optim, linear scan if < 7 keys (WIP)
Browse files Browse the repository at this point in the history
avoid hash calculation for a short number of keys.
calloc the first 7 words of HvARRAY. if we add one to the 6th entry
we need to split it, as the 7th, the last, is needed as NULL sentinel.

on split a small hash, we need to allocate a fresh array to move
the hashed entries to. This can be optimized furtheron. (alloc 2x)

on insert a new entry at 7th, we can avoid a split when placeholders exist.
just replace it then.

See #102

WIP: the standard operations work, but use constant fails.
Currently 13% slower.
  • Loading branch information
Reini Urban authored and rurban committed Jun 28, 2019
1 parent 178a393 commit a137ea4
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 28 deletions.
165 changes: 145 additions & 20 deletions hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,35 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
}
}

masked_flags = (flags & HVhek_MASK);

/* cache optimization: with not many keys, avoid hash
and just do a linear search. The last entry must be a sentinel NULL */
if (XHvSMALL(xhv)) {
entry = NULL;
assert(!(keysv && SvIsCOW_shared_hash(keysv) && HvSHAREKEYS(hv)));
for (oentry = &(HvARRAY(hv))[0]; *oentry; oentry++) {
CHECK_HASH_FLOOD(collisions)
/*if (!HeKEY_hek(entry))
continue;*/
/*if (HeHASH(entry) != hash)
continue;*/
if (HeKLEN(*oentry) != klen)
continue;
if (memNE(HeKEY(*oentry),key,klen)) /* is this it? */
continue;
if ((HeKFLAGS(*oentry) ^ masked_flags) & HVhek_UTF8)
continue;
entry = *oentry;
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH found small %s{%s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key));
goto found;
}
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH not found small %s{%s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key));
goto not_found;
}

if (keysv && (SvIsCOW_shared_hash(keysv))) {
if (HvSHAREKEYS(hv)) {
keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
Expand All @@ -609,8 +638,6 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
else if (!hash)
PERL_HASH(hash, key, klen);

masked_flags = (flags & HVhek_MASK);

#ifdef DYNAMIC_ENV_FETCH
if (!HvARRAY(hv)) {
entry = oentry = NULL;
Expand Down Expand Up @@ -744,7 +771,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
/* move found bucket to the front
oe -> e -> A => e -> oe -> A
oe -> A .. X -> e -> B => e -> oe -> A .. X -> B */
if (!HvEITER_get(hv) && entry != *oentry) {
if (!HvEITER_get(hv) && entry != *oentry && !XHvSMALL(xhv)) {
if (HeNEXT(*oentry) == entry) {
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH move up 1\t%s{%s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key));
Expand Down Expand Up @@ -874,7 +901,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
#endif

#ifndef PERL_PERTURB_KEYS_TOP
oentry = &HvARRAY(hv)[ HvHASH_INDEX(hash, xhv->xhv_max) ];
if (!XHvSMALL(xhv))
oentry = &HvARRAY(hv)[ HvHASH_INDEX(hash, xhv->xhv_max) ];
#endif

#if INTSIZE > 4
Expand Down Expand Up @@ -902,6 +930,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
else /* gotta do the real thing */
HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
HeVAL(entry) = val;
/*if (xhv->xhv_keys <= PERL_HV_SMALL_MAX) {
*(entry+1) = NULL;
}*/

if (SvOOK(hv)) { /* aux struct present */
#ifdef USE_SAFE_HASHITER
Expand Down Expand Up @@ -933,7 +964,14 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
{ /* Insert at the top which gives us the best performance */
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH insert top\t%s{%.*s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", (int)klen, key));
HeNEXT(entry) = *oentry; /* oe -> n: e -> oe -> n */
if (!XHvSMALL(xhv))
HeNEXT(entry) = *oentry; /* oe -> n: e -> oe -> n */
else
#if 1
assert(!HeNEXT(entry));
#else
HeNEXT(entry) = NULL;
#endif
*oentry = entry;
}
#ifdef DEBUGGING
Expand Down Expand Up @@ -967,7 +1005,9 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
HvHASKFLAGS_on(hv);

xhv->xhv_keys++;
if ( DO_HSPLIT(xhv) ) {
if ( (XHvSMALL(xhv) && xhv->xhv_keys == PERL_HV_SMALL_MAX)
|| DO_HSPLIT(xhv) )
{
const U32 oldsize = xhv->xhv_max + 1;
const U32 items = HvPLACEHOLDERS_get(hv);
DEBUG_H(PerlIO_printf(Perl_debug_log,
Expand All @@ -986,10 +1026,26 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
putting in all the placeholders (first) before turning on the
readonly flag, because Storable always pre-splits the hash.
If we're lucky, then we may clear sufficient placeholders to
avoid needing to split the hash at all. */
clear_placeholders(hv, items);
if (DO_HSPLIT(xhv))
hsplit(hv, oldsize, (U32)(oldsize * 2));
avoid needing to split the hash at all. */
if ( xhv->xhv_keys == PERL_HV_SMALL_MAX ) {
U32 i;
/* avoid split/leaving small hash, put into placeholder slot */
for (i = 0; i <= PERL_HV_SMALL_MAX; i++) {
HE **oentry = &(HvARRAY(hv)[i]);
if (HeVAL(*oentry) == &PL_sv_placeholder) {
DEBUG_H(PerlIO_printf(Perl_debug_log,
"HASH replace placeholder %s{%s} [%ld]\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key, (long)i));
*oentry = entry;
HvPLACEHOLDERS(hv)--;
break;
}
}
} else {
clear_placeholders(hv, items);
if (DO_HSPLIT(xhv))
hsplit(hv, oldsize, (U32)(oldsize * 2));
}
} else {
hsplit(hv, oldsize, (U32)(oldsize * 2));
}
Expand Down Expand Up @@ -1509,6 +1565,35 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen,
HvHASKFLAGS_on(MUTABLE_SV(hv));
}

masked_flags = (k_flags & HVhek_MASK);

/* cache optimization: with not many keys, avoid hash
and just do a linear search. It cannot be a shared hash, keysv_hek */
if (XHvSMALL(xhv)) {
entry = NULL;
assert(!(keysv && SvIsCOW_shared_hash(keysv) && HvSHAREKEYS(hv)));
for (oentry = &(HvARRAY(hv))[0]; *oentry; oentry++) {
CHECK_HASH_FLOOD(collisions)
/*if (!HeKEY_hek(entry))
continue;*/
/*if (HeHASH(entry) != hash)
continue;*/
if (HeKLEN(*oentry) != klen)
continue;
if (memNE(HeKEY(*oentry),key,klen)) /* is this it? */
continue;
if ((HeKFLAGS(*oentry) ^ masked_flags) & HVhek_UTF8)
continue;
entry = *oentry;
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH found small %s{%s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key));
goto found;
}
DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH not found small %s{%s}\n",
HvNAME_get(hv)?HvNAME_get(hv):"", key));
goto not_found;
}

if (keysv && (SvIsCOW_shared_hash(keysv))) {
if (HvSHAREKEYS(hv)) {
keysv_hek = SvSHARED_HEK_FROM_PV(SvPVX_const(keysv));
Expand Down Expand Up @@ -1810,10 +1895,9 @@ S_hsplit_move_aux(pTHX_ HV *hv, U32 const oldsize, U32 newsize)
STATIC void
S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize)
{
U32 i, newmax;
U32 i = 0, newmax;
char *a = (char*) HvARRAY(hv);
HE **aep;

bool do_aux= (
/* already have an HvAUX(hv) so we have to move it */
SvOOK(hv) ||
Expand All @@ -1825,14 +1909,14 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize)

PERL_ARGS_ASSERT_HSPLIT;

if (LIKELY(newsize > oldsize)) {
if (LIKELY(newsize > oldsize) && oldsize > 8) {
PL_nomemok = TRUE;
Renew(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
+ (do_aux ? sizeof(struct xpvhv_aux) : 0), char);
PL_nomemok = FALSE;
}
if (!a) {
return;
if (!a)
return;
HvARRAY(hv) = (HE**) a;
}

#ifdef PERL_HASH_RANDOMIZE_KEYS
Expand All @@ -1850,7 +1934,7 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize)
newmax = newsize == U32_MAX ? newsize : newsize - 1;
HvARRAY(hv) = (HE**) a;
HvMAX(hv) = newmax;
if (LIKELY(newsize > oldsize)) {
if (LIKELY(newsize > oldsize) && oldsize > 8) {
/* on grow before we zero the newly added memory, we
* need to deal with the aux struct that may be there
* or have been allocated by us */
Expand All @@ -1860,11 +1944,52 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize)
Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char);
}

if (!HvTOTALKEYS(hv)) /* skip rest if no entries */
if (!HvTOTALKEYS(hv)) { /* skip rest if no entries */
if (oldsize <= 8) { /* need to resize at least */
int n = PERL_HV_ARRAY_ALLOC_BYTES(newsize)
+ (do_aux ? sizeof(struct xpvhv_aux) : 0);
PL_nomemok = TRUE;
Renew(a, n, char);
Zero(a, n, char);
PL_nomemok = FALSE;
HvARRAY(hv) = (HE**) a;
HvMAX(hv) = newsize - 1;
}
return;
}

aep = (HE**)a;
for (i=0; i < oldsize; i++) {
if (oldsize <= 8) {
HE **newa;
char *newap;
Newxz(newap, PERL_HV_ARRAY_ALLOC_BYTES(newsize)
+ (do_aux ? sizeof(struct xpvhv_aux) : 0), char);
newa = (HE**)newap;
if (do_aux)
hsplit_move_aux(hv, oldsize, newsize);
newsize--;
for (; *aep; aep++) {
HE **entry;
const char const *key = (const char const*)HeKEY(*aep);
const I32 klen = HeKLEN(*aep);
U32 hash;
PERL_HASH(hash, key, klen);
HeHASH(*aep) = hash; /* the old hek had no hash */
entry = &newa[hash & newsize];
if (*entry) { /* insert at top */
HeNEXT(*aep) = HeNEXT(*entry);
}
*entry = *aep;
}
HvARRAY(hv) = newa;
HvMAX(hv) = newsize;
return;
}

/* now we can safely clear the second half */
Zero(&a[oldsize * sizeof(HE*)], (newsize-oldsize) * sizeof(HE*), char);
HvMAX(hv) = newmax;
for (i=0; i<oldsize; i++) {
HE **oentry = aep + i;
HE *entry = aep[i];

Expand Down Expand Up @@ -2042,7 +2167,7 @@ Perl_newHVhv(pTHX_ HV *ohv)
const bool shared = !!HvSHAREKEYS(ohv);
HE **ents, ** const oents = (HE **)HvARRAY(ohv);
char *a;
Newx(a, PERL_HV_ARRAY_ALLOC_BYTES(hv_max+1), char);
Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(hv_max+1), char);
ents = (HE**)a;

/* In each bucket... */
Expand Down
17 changes: 9 additions & 8 deletions hv.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,10 @@ C<SV*>.
#define PERL_HASH_DEFAULT_HvMAX 7

/* Small hash optimization. https://github.com/perl11/cperl/issues/102
If max 7 keys set the HvAUX_SMALL flag and just do a linear scan */
If max 6 keys set the HvAUX_SMALL flag and just do a linear scan.
The 7th key must be NULL. */

#define PERL_HV_SMALL_MAX 7
#define PERL_HV_SMALL_MAX PERL_HASH_DEFAULT_HvMAX

/* During hsplit(), if HvMAX(hv)+1 (the new bucket count) is >= this value,
* we preallocate the HvAUX() struct.
Expand Down Expand Up @@ -465,8 +466,8 @@ C<SV*>.
#define HvPLACEHOLDERS_set(hv,p) Perl_hv_placeholders_set(aTHX_ MUTABLE_HV(hv), p)

/* cperl only*/
#define HvSMALL(hv) (HvTOTALKEYS(hv) <= PERL_HV_SMALL_MAX)
#define XHvSMALL(xhv) (XHvTOTALKEYS(xhv) <= PERL_HV_SMALL_MAX)
#define HvSMALL(hv) (HvMAX(hv) < PERL_HV_SMALL_MAX)
#define XHvSMALL(xhv) ((xhv)->xhv_max < PERL_HV_SMALL_MAX)

#define HvSHAREKEYS(hv) (SvFLAGS(hv) & SVphv_SHAREKEYS)
#define HvSHAREKEYS_on(hv) (SvFLAGS(hv) |= SVphv_SHAREKEYS)
Expand Down Expand Up @@ -533,10 +534,10 @@ C<SV*>.
#ifndef PERL_CORE
# define Nullhek Null(HEK*)
#endif
#define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0])
#define HEK_HASH(hek) (hek)->hek_hash
#define HEK_LEN(hek) (hek)->hek_len
#define HEK_KEY(hek) (hek)->hek_key
#define HEK_BASESIZE STRUCT_OFFSET(HEK, hek_key[0])
#define HEK_HASH(hek) (hek)->hek_hash
#define HEK_LEN(hek) (hek)->hek_len
#define HEK_KEY(hek) (hek)->hek_key
#define HEK_FLAGS(hek) (*((unsigned char *)(HEK_KEY(hek))+HEK_LEN(hek)+1))
#define HEK_IS_SVKEY(hek) HEK_LEN(hek) == HEf_SVKEY

Expand Down

0 comments on commit a137ea4

Please sign in to comment.