diff options
author | Nicholas Clark <nick@ccl4.org> | 2003-10-17 21:09:13 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2003-10-17 21:09:13 +0000 |
commit | ff38041cde2db50f5e2bfa2ce8e9d9671e852881 (patch) | |
tree | 17bf3079c4040650e945cb165068c5f3bd95a105 /hv.c | |
parent | 183b68026dff6239890149e06dcb4282fca40f73 (diff) | |
download | perl-ff38041cde2db50f5e2bfa2ce8e9d9671e852881.tar.gz |
Integrate:
[ 21446]
Duplicate 19423 (pathological hashes too easy) into hv_store_ent
(the routine used by perl level HV operations)
[ 21469]
Duplicate 19423 (pathological hashes too easy) into share_hek_flags
(as suggested by Jan Dubois)
[ 21471]
Plan C for foiling the algorithmic complexity attack
(based on Chip's plan A (binary compatibility with 5.8.0 and 5.8.1),
Chip's plan B (do something new inside the hv functions)
and introspective sort)
Provides infrastructure for hashes to change their hash function
if necessary, and code in hsplit to detect pathalogical data and
instigate a random rehashing.
Needs refinement. Let's see how much smoke it creates.
[ 21474]
Plan C rough edge smoothing - forgot to turn on the "has key flags"
flag on the hash when rehashing. Can turn off the "rehasing" flag
if the hash is cleared
p4raw-link: @21474 on //depot/perl: bb443f97c2b5dfbb53285f377a01d882f53de1c7
p4raw-link: @21471 on //depot/perl: 4b5190b5321b9b9e2ec46674b256120d4fdab72a
p4raw-link: @21469 on //depot/perl: 4c9cc5953a3992eecff824aeaacb5b7670e2db46
p4raw-link: @21446 on //depot/perl: 5355f3c7126474078b6e199097ac1d1343f2fdb1
p4raw-id: //depot/maint-5.8/perl@21475
p4raw-integrated: from //depot/perl@21471 'merge in' perl.c (@21470..)
p4raw-edited: from //depot/perl@21469 'edit in' hv.c (@21446..)
p4raw-integrated: from //depot/perl@21395 'copy in' hv.h (@21198..)
'merge in' intrpvar.h (@20263..) embedvar.h perlapi.h
(@20565..) util.c (@21397..) sv.c (@21420..) sv.h (@21468..)
Diffstat (limited to 'hv.c')
-rw-r--r-- | hv.c | 151 |
1 files changed, 138 insertions, 13 deletions
@@ -274,7 +274,11 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) } } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -443,8 +447,11 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash) flags |= HVhek_WASUTF8 | HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { PERL_HASH(hash, key, klen); + } /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -621,7 +628,12 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, if (flags) HvHASKFLAGS_on((SV*)hv); - if (!hash) + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); if (!xhv->xhv_array /* !HvARRAY(hv) */) @@ -791,8 +803,14 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) HvHASKFLAGS_on((SV*)hv); } - if (!hash) + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { PERL_HASH(hash, key, klen); + } if (!xhv->xhv_array /* !HvARRAY(hv) */) Newz(505, xhv->xhv_array /* HvARRAY(hv) */, @@ -859,8 +877,8 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) xhv->xhv_keys++; /* HvKEYS(hv)++ */ if (i) { /* initial entry? */ xhv->xhv_fill++; /* HvFILL(hv)++ */ - if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) - hsplit(hv); + } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { + hsplit(hv); } return entry; @@ -938,7 +956,11 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1095,8 +1117,11 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1243,7 +1268,11 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } #ifdef DYNAMIC_ENV_FETCH if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*); @@ -1347,7 +1376,9 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash) if (key != keysave) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); #ifdef DYNAMIC_ENV_FETCH @@ -1403,6 +1434,8 @@ S_hsplit(pTHX_ HV *hv) register HE **bep; register HE *entry; register HE **oentry; + int longest_chain = 0; + int was_shared; PL_nomemok = TRUE; #if defined(STRANGE_MALLOC) || defined(MYMALLOC) @@ -1433,6 +1466,9 @@ S_hsplit(pTHX_ HV *hv) aep = (HE**)a; for (i=0; i<oldsize; i++,aep++) { + int left_length = 0; + int right_length = 0; + if (!*aep) /* non-existent */ continue; bep = aep+oldsize; @@ -1443,14 +1479,91 @@ S_hsplit(pTHX_ HV *hv) if (!*bep) xhv->xhv_fill++; /* HvFILL(hv)++ */ *bep = entry; + right_length++; continue; } - else + else { oentry = &HeNEXT(entry); + left_length++; + } } if (!*aep) /* everything moved */ xhv->xhv_fill--; /* HvFILL(hv)-- */ + /* I think we don't actually need to keep track of the longest length, + merely flag if anything is too long. But for the moment while + developing this code I'll track it. */ + if (left_length > longest_chain) + longest_chain = left_length; + if (right_length > longest_chain) + longest_chain = right_length; } + + + /* Pick your policy for "hashing isn't working" here: */ + if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv) + || HvREHASH(hv)) { + return; + } + + if (hv == PL_strtab) { + /* Urg. Someone is doing something nasty to the string table. + Can't win. */ + return; + } + + /* Awooga. Awooga. Pathological data. */ + /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n", + longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ + + ++newsize; + Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + was_shared = HvSHAREKEYS(hv); + + xhv->xhv_fill = 0; + HvSHAREKEYS_off(hv); + HvREHASH_on(hv); + HvHASKFLAGS_on(hv); + + aep = (HE **) xhv->xhv_array; + + for (i=0; i<newsize; i++,aep++) { + entry = *aep; + while (entry) { + /* We're going to trash this HE's next pointer when we chain it + into the new hash below, so store where we go next. */ + HE *next = HeNEXT(entry); + UV hash; + + /* Rehash it */ + PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry)); + + if (was_shared) { + /* Unshare it. */ + HEK *new_hek + = save_hek_flags(HeKEY(entry), HeKLEN(entry), + hash, HeKFLAGS(entry)); + unshare_hek (HeKEY_hek(entry)); + HeKEY_hek(entry) = new_hek; + } else { + /* Not shared, so simply write the new hash in. */ + HeHASH(entry) = hash; + } + /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/ + HEK_REHASH_on(HeKEY_hek(entry)); + /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/ + + /* Copy oentry to the correct new chain. */ + bep = ((HE**)a) + (hash & (I32) xhv->xhv_max); + if (!*bep) + xhv->xhv_fill++; /* HvFILL(hv)++ */ + HeNEXT(entry) = *bep; + *bep = entry; + + entry = next; + } + } + Safefree (xhv->xhv_array); + xhv->xhv_array = a; /* HvARRAY(hv) = a */ } void @@ -1554,6 +1667,7 @@ Perl_newHV(pTHX) #ifndef NODEFAULT_SHAREKEYS HvSHAREKEYS_on(hv); /* key-sharing on by default */ #endif + xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */ xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */ xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */ @@ -1731,6 +1845,7 @@ Perl_hv_clear(pTHX_ HV *hv) mg_clear((SV*)hv); HvHASKFLAGS_off(hv); + HvREHASH_off(hv); } STATIC void @@ -2027,7 +2142,17 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry) sv = newSVpvn ((char*)as_utf8, utf8_len); SvUTF8_on (sv); Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */ - } else { + } else if (flags & HVhek_REHASH) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK. This hv is using custom a hasing + algorithm. Hence we can't return a shared string scalar, as + that would contain the (wrong) hash value, and might get passed + into an hv routine with a regular hash */ + + sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); + if (HEK_UTF8(hek)) + SvUTF8_on (sv); + } else { sv = newSVpvn_share(HEK_KEY(hek), (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)), HEK_HASH(hek)); @@ -2276,7 +2401,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) xhv->xhv_keys++; /* HvKEYS(hv)++ */ if (i) { /* initial entry? */ xhv->xhv_fill++; /* HvFILL(hv)++ */ - if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) + } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { hsplit(PL_strtab); } } |