summaryrefslogtreecommitdiff
path: root/hv.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2013-03-11 11:42:32 +0000
committerNicholas Clark <nick@ccl4.org>2013-05-29 10:52:50 +0200
commit9faf471aadb0009c0def1e2e9ab174082632af3b (patch)
tree83cb48c21bb39e29bcc82690c0f5b8caefa47d8d /hv.c
parent99f57afc36ebadbe790958225afc2a70c73dd64a (diff)
downloadperl-9faf471aadb0009c0def1e2e9ab174082632af3b.tar.gz
Cache HvFILL() for larger hashes, and update on insertion/deletion.
This avoids HvFILL() being O(n) for large n on large hashes, but also avoids storing the value of HvFILL() in smaller hashes (ie a memory overhead on every single object built using a hash.)
Diffstat (limited to 'hv.c')
-rw-r--r--hv.c82
1 files changed, 64 insertions, 18 deletions
diff --git a/hv.c b/hv.c
index 916b64bec5..cbeed30faa 100644
--- a/hv.c
+++ b/hv.c
@@ -36,6 +36,7 @@ holds the key and hash value.
#include "perl.h"
#define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
+#define HV_FILL_THRESHOLD 31
static const char S_strtab_error[]
= "Cannot modify shared string table in hv_%s";
@@ -790,6 +791,13 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
HeVAL(entry) = val;
+ if (!*oentry && SvOOK(hv)) {
+ /* initial entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ ++aux->xhv_fill_lazy;
+ }
+
#ifdef PERL_HASH_RANDOMIZE_KEYS
/* This logic semi-randomizes the insert order in a bucket.
* Either we insert into the top, or the slot below the top,
@@ -948,6 +956,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
XPVHV* xhv;
HE *entry;
HE **oentry;
+ HE *const *first_entry;
bool is_utf8 = (k_flags & HVhek_UTF8) ? TRUE : FALSE;
int masked_flags;
@@ -1023,7 +1032,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
masked_flags = (k_flags & HVhek_MASK);
- oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
+ first_entry = oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)];
entry = *oentry;
for (; entry; oentry = &HeNEXT(entry), entry = *oentry) {
SV *sv;
@@ -1111,6 +1120,12 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HvPLACEHOLDERS(hv)++;
else {
*oentry = HeNEXT(entry);
+ if(!*first_entry && SvOOK(hv)) {
+ /* removed last entry, and aux struct present. */
+ struct xpvhv_aux *const aux = HvAUX(hv);
+ if (aux->xhv_fill_lazy)
+ --aux->xhv_fill_lazy;
+ }
if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */)
HvLAZYDEL_on(hv);
else {
@@ -1187,6 +1202,10 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
#ifdef PERL_HASH_RANDOMIZE_KEYS
dest->xhv_rand = (U32)PL_hash_rand_bits;
#endif
+ /* For now, just reset the lazy fill counter.
+ It would be possible to update the counter in the code below
+ instead. */
+ dest->xhv_fill_lazy = 0;
}
PL_nomemok = FALSE;
@@ -1657,22 +1676,28 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
PERL_ARGS_ASSERT_HFREE_NEXT_ENTRY;
- if (SvOOK(hv) && ((iter = HvAUX(hv)))
- && ((entry = iter->xhv_eiter)) )
- {
- /* the iterator may get resurrected after each
- * destructor call, so check each time */
- if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
- HvLAZYDEL_off(hv);
- hv_free_ent(hv, entry);
- /* warning: at this point HvARRAY may have been
- * re-allocated, HvMAX changed etc */
- }
- iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
- iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
+ if (SvOOK(hv) && ((iter = HvAUX(hv)))) {
+ if ((entry = iter->xhv_eiter)) {
+ /* the iterator may get resurrected after each
+ * destructor call, so check each time */
+ if (entry && HvLAZYDEL(hv)) { /* was deleted earlier? */
+ HvLAZYDEL_off(hv);
+ hv_free_ent(hv, entry);
+ /* warning: at this point HvARRAY may have been
+ * re-allocated, HvMAX changed etc */
+ }
+ iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
+ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */
#ifdef PERL_HASH_RANDOMIZE_KEYS
- iter->xhv_last_rand = iter->xhv_rand;
+ iter->xhv_last_rand = iter->xhv_rand;
#endif
+ }
+ /* Reset any cached HvFILL() to "unknown". It's unlikely that anyone
+ will actually call HvFILL() on a hash under destruction, so it
+ seems pointless attempting to track the number of keys remaining.
+ But if they do, we want to reset it again. */
+ if (iter->xhv_fill_lazy)
+ iter->xhv_fill_lazy = 0;
}
if (!((XPVHV*)SvANY(hv))->xhv_keys)
@@ -1830,17 +1855,22 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
Returns the number of hash buckets that happen to be in use. This function is
wrapped by the macro C<HvFILL>.
-Previously this value was stored in the HV structure, rather than being
-calculated on demand.
+Previously this value was always stored in the HV structure, which created an
+overhead on every hash (and pretty much every object) for something that was
+rarely used. Now we calculate it on demand the first time that it is needed,
+and cache it if that calculation is going to be costly to repeat. The cached
+value is updated by insertions and deletions, but (currently) discarded if
+the hash is split.
=cut
*/
STRLEN
-Perl_hv_fill(pTHX_ HV const *const hv)
+Perl_hv_fill(pTHX_ HV *const hv)
{
STRLEN count = 0;
HE **ents = HvARRAY(hv);
+ struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL;
PERL_ARGS_ASSERT_HV_FILL;
@@ -1849,6 +1879,11 @@ Perl_hv_fill(pTHX_ HV const *const hv)
if (HvTOTALKEYS(hv) < 2)
return HvTOTALKEYS(hv);
+#ifndef DEBUGGING
+ if (aux && aux->xhv_fill_lazy)
+ return aux->xhv_fill_lazy;
+#endif
+
if (ents) {
HE *const *const last = ents + HvMAX(hv);
count = last + 1 - ents;
@@ -1858,6 +1893,16 @@ Perl_hv_fill(pTHX_ HV const *const hv)
--count;
} while (++ents <= last);
}
+ if (aux) {
+#ifdef DEBUGGING
+ if (aux->xhv_fill_lazy)
+ assert(aux->xhv_fill_lazy == count);
+#endif
+ aux->xhv_fill_lazy = count;
+ } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) {
+ aux = hv_auxinit(hv);
+ aux->xhv_fill_lazy = count;
+ }
return count;
}
@@ -1932,6 +1977,7 @@ S_hv_auxinit(pTHX_ HV *hv) {
#ifdef PERL_HASH_RANDOMIZE_KEYS
iter->xhv_last_rand = iter->xhv_rand;
#endif
+ iter->xhv_fill_lazy = 0;
iter->xhv_name_u.xhvnameu_name = 0;
iter->xhv_name_count = 0;
iter->xhv_backreferences = 0;