diff options
-rw-r--r-- | embedvar.h | 4 | ||||
-rw-r--r-- | hv.c | 141 | ||||
-rw-r--r-- | hv.h | 26 | ||||
-rw-r--r-- | intrpvar.h | 4 | ||||
-rw-r--r-- | perl.c | 4 | ||||
-rw-r--r-- | perlapi.h | 4 | ||||
-rw-r--r-- | sv.c | 1 | ||||
-rw-r--r-- | sv.h | 1 | ||||
-rw-r--r-- | util.c | 2 |
9 files changed, 174 insertions, 13 deletions
diff --git a/embedvar.h b/embedvar.h index a4f4a929bf..f58fae9e98 100644 --- a/embedvar.h +++ b/embedvar.h @@ -320,6 +320,8 @@ #define PL_multi_open (vTHX->Imulti_open) #define PL_multi_start (vTHX->Imulti_start) #define PL_multiline (vTHX->Imultiline) +#define PL_new_hash_seed (vTHX->Inew_hash_seed) +#define PL_new_hash_seed_set (vTHX->Inew_hash_seed_set) #define PL_nexttoke (vTHX->Inexttoke) #define PL_nexttype (vTHX->Inexttype) #define PL_nextval (vTHX->Inextval) @@ -624,6 +626,8 @@ #define PL_Imulti_open PL_multi_open #define PL_Imulti_start PL_multi_start #define PL_Imultiline PL_multiline +#define PL_Inew_hash_seed PL_new_hash_seed +#define PL_Inew_hash_seed_set PL_new_hash_seed_set #define PL_Inexttoke PL_nexttoke #define PL_Inexttype PL_nexttype #define PL_Inextval PL_nextval @@ -274,7 +274,11 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) } } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -445,7 +449,9 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash) flags |= HVhek_WASUTF8 | HVhek_FREEKEY; } - if (!hash) { + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { if SvIsCOW_shared_hash(keysv) { hash = SvUVX(keysv); } else { @@ -628,7 +634,12 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, if (flags) HvHASKFLAGS_on((SV*)hv); - if (!hash) + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); if (!xhv->xhv_array /* !HvARRAY(hv) */) @@ -798,7 +809,12 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) HvHASKFLAGS_on((SV*)hv); } - if (!hash) { + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { if SvIsCOW_shared_hash(keysv) { hash = SvUVX(keysv); } else { @@ -950,7 +966,11 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1107,8 +1127,11 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1255,7 +1278,11 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } #ifdef DYNAMIC_ENV_FETCH if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*); @@ -1359,7 +1386,9 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash) if (key != keysave) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); #ifdef DYNAMIC_ENV_FETCH @@ -1415,6 +1444,8 @@ S_hsplit(pTHX_ HV *hv) register HE **bep; register HE *entry; register HE **oentry; + int longest_chain = 0; + int was_shared; PL_nomemok = TRUE; #if defined(STRANGE_MALLOC) || defined(MYMALLOC) @@ -1445,6 +1476,9 @@ S_hsplit(pTHX_ HV *hv) aep = (HE**)a; for (i=0; i<oldsize; i++,aep++) { + int left_length = 0; + int right_length = 0; + if (!*aep) /* non-existent */ continue; bep = aep+oldsize; @@ -1455,14 +1489,90 @@ S_hsplit(pTHX_ HV *hv) if (!*bep) xhv->xhv_fill++; /* HvFILL(hv)++ */ *bep = entry; + right_length++; continue; } - else + else { oentry = &HeNEXT(entry); + left_length++; + } } if (!*aep) /* everything moved */ xhv->xhv_fill--; /* HvFILL(hv)-- */ + /* I think we don't actually need to keep track of the longest length, + merely flag if anything is too long. But for the moment while + developing this code I'll track it. */ + if (left_length > longest_chain) + longest_chain = left_length; + if (right_length > longest_chain) + longest_chain = right_length; + } + + + /* Pick your policy for "hashing isn't working" here: */ + if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv) + || HvREHASH(hv)) { + return; + } + + if (hv == PL_strtab) { + /* Urg. Someone is doing something nasty to the string table. + Can't win. */ + return; + } + + /* Awooga. Awooga. Pathological data. */ + /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n", + longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ + + ++newsize; + Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + was_shared = HvSHAREKEYS(hv); + + xhv->xhv_fill = 0; + HvSHAREKEYS_off(hv); + HvREHASH_on(hv); + + aep = (HE **) xhv->xhv_array; + + for (i=0; i<newsize; i++,aep++) { + entry = *aep; + while (entry) { + /* We're going to trash this HE's next pointer when we chain it + into the new hash below, so store where we go next. */ + HE *next = HeNEXT(entry); + UV hash; + + /* Rehash it */ + PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry)); + + if (was_shared) { + /* Unshare it. */ + HEK *new_hek + = save_hek_flags(HeKEY(entry), HeKLEN(entry), + hash, HeKFLAGS(entry)); + unshare_hek (HeKEY_hek(entry)); + HeKEY_hek(entry) = new_hek; + } else { + /* Not shared, so simply write the new hash in. */ + HeHASH(entry) = hash; + } + /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/ + HEK_REHASH_on(HeKEY_hek(entry)); + /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/ + + /* Copy oentry to the correct new chain. */ + bep = ((HE**)a) + (hash & (I32) xhv->xhv_max); + if (!*bep) + xhv->xhv_fill++; /* HvFILL(hv)++ */ + HeNEXT(entry) = *bep; + *bep = entry; + + entry = next; + } } + Safefree (xhv->xhv_array); + xhv->xhv_array = a; /* HvARRAY(hv) = a */ } void @@ -1566,6 +1676,7 @@ Perl_newHV(pTHX) #ifndef NODEFAULT_SHAREKEYS HvSHAREKEYS_on(hv); /* key-sharing on by default */ #endif + xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */ xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */ xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */ @@ -2039,7 +2150,17 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry) sv = newSVpvn ((char*)as_utf8, utf8_len); SvUTF8_on (sv); Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */ - } else { + } else if (flags & HVhek_REHASH) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK. This hv is using custom a hasing + algorithm. Hence we can't return a shared string scalar, as + that would contain the (wrong) hash value, and might get passed + into an hv routine with a regular hash */ + + sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); + if (HEK_UTF8(hek)) + SvUTF8_on (sv); + } else { sv = newSVpvn_share(HEK_KEY(hek), (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)), HEK_HASH(hek)); @@ -89,6 +89,24 @@ struct xpvhv { (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ } STMT_END +#ifdef PERL_IN_HV_C +#define PERL_HASH_INTERNAL(hash,str,len) \ + STMT_START { \ + register const char *s_PeRlHaSh_tmp = str; \ + register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \ + register I32 i_PeRlHaSh = len; \ + register U32 hash_PeRlHaSh = PL_new_hash_seed; \ + while (i_PeRlHaSh--) { \ + hash_PeRlHaSh += *s_PeRlHaSh++; \ + hash_PeRlHaSh += (hash_PeRlHaSh << 10); \ + hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \ + } \ + hash_PeRlHaSh += (hash_PeRlHaSh << 3); \ + hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \ + (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \ + } STMT_END +#endif + /* =head1 Hash Manipulation Functions @@ -203,6 +221,10 @@ C<SV*>. #define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL) #define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL) +#define HvREHASH(hv) (SvFLAGS(hv) & SVphv_REHASH) +#define HvREHASH_on(hv) (SvFLAGS(hv) |= SVphv_REHASH) +#define HvREHASH_off(hv) (SvFLAGS(hv) &= ~SVphv_REHASH) + /* Maybe amagical: */ /* #define HV_AMAGICmb(hv) (SvFLAGS(hv) & (SVpgv_badAM | SVpgv_AM)) */ @@ -224,6 +246,7 @@ C<SV*>. #define HeKLEN(he) HEK_LEN(HeKEY_hek(he)) #define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he)) #define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he)) +#define HeKREHASH(he) HEK_REHASH(HeKEY_hek(he)) #define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he)) #define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he)) #define HeVAL(he) (he)->hent_val @@ -254,6 +277,7 @@ C<SV*>. #define HVhek_UTF8 0x01 /* Key is utf8 encoded. */ #define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */ +#define HVhek_REHASH 0x04 /* This key is in an hv using a custom HASH . */ #define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */ #define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder. * (may change, but Storable is a core module) */ @@ -265,6 +289,8 @@ C<SV*>. #define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8) #define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8) #define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8) +#define HEK_REHASH(hek) (HEK_FLAGS(hek) & HVhek_REHASH) +#define HEK_REHASH_on(hek) (HEK_FLAGS(hek) |= HVhek_REHASH) /* calculate HV array allocation */ #if defined(STRANGE_MALLOC) || defined(MYMALLOC) diff --git a/intrpvar.h b/intrpvar.h index 6a34ea4e67..09709ea123 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -531,6 +531,10 @@ PERLVAR(IDBassertion, SV *) PERLVARI(Icv_has_eval, I32, 0) /* PL_compcv includes an entereval or similar */ +PERLVARI(Inew_hash_seed, UV, 0) /* 582 hash initializer */ + +PERLVARI(Inew_hash_seed_set, bool, FALSE) /* 582 hash initialized? */ + /* New variables must be added to the very end, before this comment, * for binary compatibility (the offsets of the old members must not change). * (Don't forget to add your variable also to perl_clone()!) @@ -918,7 +918,7 @@ setuid perl scripts securely.\n"); * it is your responsibility to provide a good random seed! * You can also define PERL_HASH_SEED in compile time, see hv.h. */ if (!PL_hash_seed_set) - PL_hash_seed = get_hash_seed(); + PL_new_hash_seed = get_hash_seed(); { char *s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG"); @@ -927,7 +927,7 @@ setuid perl scripts securely.\n"); if (i == 1) PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n", - PL_hash_seed); + PL_new_hash_seed); } } #endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */ @@ -398,6 +398,10 @@ END_EXTERN_C #define PL_multi_start (*Perl_Imulti_start_ptr(aTHX)) #undef PL_multiline #define PL_multiline (*Perl_Imultiline_ptr(aTHX)) +#undef PL_new_hash_seed +#define PL_new_hash_seed (*Perl_Inew_hash_seed_ptr(aTHX)) +#undef PL_new_hash_seed_set +#define PL_new_hash_seed_set (*Perl_Inew_hash_seed_set_ptr(aTHX)) #undef PL_nexttoke #define PL_nexttoke (*Perl_Inexttoke_ptr(aTHX)) #undef PL_nexttype @@ -11349,6 +11349,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, PL_glob_index = proto_perl->Iglob_index; PL_srand_called = proto_perl->Isrand_called; PL_hash_seed = proto_perl->Ihash_seed; + PL_new_hash_seed = proto_perl->Inew_hash_seed; PL_uudmap['M'] = 0; /* reinits on demand */ PL_bitcount = Nullch; /* reinits on demand */ @@ -213,6 +213,7 @@ perform the upgrade if necessary. See C<svtype>. #define SVrepl_EVAL 0x40000000 /* Replacement part of s///e */ +#define SVphv_REHASH 0x10000000 /* HV is recalculating hash values */ #define SVphv_SHAREKEYS 0x20000000 /* keys live on shared string table */ #define SVphv_LAZYDEL 0x40000000 /* entry in xhv_eiter must be deleted */ #define SVphv_HASKFLAGS 0x80000000 /* keys have flag byte after hash */ @@ -4427,7 +4427,7 @@ Perl_get_hash_seed(pTHX) Perl_croak(aTHX_ "Your random numbers are not that random"); } } - PL_hash_seed_set = TRUE; + PL_new_hash_seed_set = TRUE; return myseed; } |