diff options
author | Nicholas Clark <nick@ccl4.org> | 2013-03-11 11:42:32 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2013-05-29 10:52:50 +0200 |
commit | 9faf471aadb0009c0def1e2e9ab174082632af3b (patch) | |
tree | 83cb48c21bb39e29bcc82690c0f5b8caefa47d8d /hv.c | |
parent | 99f57afc36ebadbe790958225afc2a70c73dd64a (diff) | |
download | perl-9faf471aadb0009c0def1e2e9ab174082632af3b.tar.gz |
Cache HvFILL() for larger hashes, and update on insertion/deletion.
This avoids HvFILL() being O(n) for large n on large hashes, but also avoids
storing the value of HvFILL() in smaller hashes (ie a memory overhead on
every single object built using a hash.)
Diffstat (limited to 'hv.c')
-rw-r--r-- | hv.c | 82 |
1 files changed, 64 insertions, 18 deletions
@@ -36,6 +36,7 @@ holds the key and hash value. #include "perl.h" #define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ +#define HV_FILL_THRESHOLD 31 static const char S_strtab_error[] = "Cannot modify shared string table in hv_%s"; @@ -790,6 +791,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags); HeVAL(entry) = val; + if (!*oentry && SvOOK(hv)) { + /* initial entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + ++aux->xhv_fill_lazy; + } + #ifdef PERL_HASH_RANDOMIZE_KEYS /* This logic semi-randomizes the insert order in a bucket. * Either we insert into the top, or the slot below the top, @@ -948,6 +956,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, XPVHV* xhv; HE *entry; HE **oentry; + HE *const *first_entry; bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE; int masked_flags; @@ -1023,7 +1032,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, masked_flags = (k_flags & HVhek_MASK); - oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; + first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; entry = *oentry; for (; entry; oentry = &HeNEXT(entry), entry = *oentry) { SV *sv; @@ -1111,6 +1120,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, HvPLACEHOLDERS(hv)++; else { *oentry = HeNEXT(entry); + if(!*first_entry && SvOOK(hv)) { + /* removed last entry, and aux struct present. */ + struct xpvhv_aux *const aux = HvAUX(hv); + if (aux->xhv_fill_lazy) + --aux->xhv_fill_lazy; + } if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */) HvLAZYDEL_on(hv); else { @@ -1187,6 +1202,10 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) #ifdef PERL_HASH_RANDOMIZE_KEYS dest->xhv_rand = (U32)PL_hash_rand_bits; #endif + /* For now, just reset the lazy fill counter. + It would be possible to update the counter in the code below + instead. */ + dest->xhv_fill_lazy = 0; } PL_nomemok = FALSE; @@ -1657,22 +1676,28 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp) PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY; - if (SvOOK(hv) && ((iter = HvAUX(hv))) - && ((entry = iter->xhv_eiter)) ) - { - /* the iterator may get resurrected after each - * destructor call, so check each time */ - if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ - HvLAZYDEL_off(hv); - hv_free_ent(hv, entry); - /* warning: at this point HvARRAY may have been - * re-allocated, HvMAX changed etc */ - } - iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ - iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ + if (SvOOK(hv) && ((iter = HvAUX(hv)))) { + if ((entry = iter->xhv_eiter)) { + /* the iterator may get resurrected after each + * destructor call, so check each time */ + if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */ + HvLAZYDEL_off(hv); + hv_free_ent(hv, entry); + /* warning: at this point HvARRAY may have been + * re-allocated, HvMAX changed etc */ + } + iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ + iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ #ifdef PERL_HASH_RANDOMIZE_KEYS - iter->xhv_last_rand = iter->xhv_rand; + iter->xhv_last_rand = iter->xhv_rand; #endif + } + /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone + will actually call HvFILL() on a hash under destruction, so it + seems pointless attempting to track the number of keys remaining. + But if they do, we want to reset it again. */ + if (iter->xhv_fill_lazy) + iter->xhv_fill_lazy = 0; } if (!((XPVHV*)SvANY(hv))->xhv_keys) @@ -1830,17 +1855,22 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags) Returns the number of hash buckets that happen to be in use. This function is wrapped by the macro C<HvFILL>. -Previously this value was stored in the HV structure, rather than being -calculated on demand. +Previously this value was always stored in the HV structure, which created an +overhead on every hash (and pretty much every object) for something that was +rarely used. Now we calculate it on demand the first time that it is needed, +and cache it if that calculation is going to be costly to repeat. The cached +value is updated by insertions and deletions, but (currently) discarded if +the hash is split. =cut */ STRLEN -Perl_hv_fill(pTHX_ HV const *const hv) +Perl_hv_fill(pTHX_ HV *const hv) { STRLEN count = 0; HE **ents = HvARRAY(hv); + struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL; PERL_ARGS_ASSERT_HV_FILL; @@ -1849,6 +1879,11 @@ Perl_hv_fill(pTHX_ HV const *const hv) if (HvTOTALKEYS(hv) < 2) return HvTOTALKEYS(hv); +#ifndef DEBUGGING + if (aux && aux->xhv_fill_lazy) + return aux->xhv_fill_lazy; +#endif + if (ents) { HE *const *const last = ents + HvMAX(hv); count = last + 1 - ents; @@ -1858,6 +1893,16 @@ Perl_hv_fill(pTHX_ HV const *const hv) --count; } while (++ents <= last); } + if (aux) { +#ifdef DEBUGGING + if (aux->xhv_fill_lazy) + assert(aux->xhv_fill_lazy == count); +#endif + aux->xhv_fill_lazy = count; + } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) { + aux = hv_auxinit(hv); + aux->xhv_fill_lazy = count; + } return count; } @@ -1932,6 +1977,7 @@ S_hv_auxinit(pTHX_ HV *hv) { #ifdef PERL_HASH_RANDOMIZE_KEYS iter->xhv_last_rand = iter->xhv_rand; #endif + iter->xhv_fill_lazy = 0; iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; |