From 4b5190b5321b9b9e2ec46674b256120d4fdab72a Mon Sep 17 00:00:00 2001 From: Nicholas Clark Date: Thu, 16 Oct 2003 21:10:27 +0000 Subject: Plan C for foiling the algorithmic complexity attack (based on Chip's plan A (binary compatibility with 5.8.0 and 5.8.1), Chip's plan B (do something new inside the hv functions) and introspective sort) Provides infrastructure for hashes to change their hash function if necessary, and code in hsplit to detect pathalogical data and instigate a random rehashing. Needs refinement. Let's see how much smoke it creates. p4raw-id: //depot/perl@21471 --- hv.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 131 insertions(+), 10 deletions(-) (limited to 'hv.c') diff --git a/hv.c b/hv.c index 965b6ae638..58799d5af2 100644 --- a/hv.c +++ b/hv.c @@ -274,7 +274,11 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags) } } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -445,7 +449,9 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash) flags |= HVhek_WASUTF8 | HVhek_FREEKEY; } - if (!hash) { + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { if SvIsCOW_shared_hash(keysv) { hash = SvUVX(keysv); } else { @@ -628,7 +634,12 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val, if (flags) HvHASKFLAGS_on((SV*)hv); - if (!hash) + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); if (!xhv->xhv_array /* !HvARRAY(hv) */) @@ -798,7 +809,12 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash) HvHASKFLAGS_on((SV*)hv); } - if (!hash) { + if (HvREHASH(hv)) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK, so that hv_iterkeysv can see it. */ + flags |= HVhek_REHASH; + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { if SvIsCOW_shared_hash(keysv) { hash = SvUVX(keysv); } else { @@ -950,7 +966,11 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1107,8 +1127,11 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) { PERL_HASH(hash, key, klen); + } /* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */ oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max]; @@ -1255,7 +1278,11 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen) k_flags |= HVhek_FREEKEY; } - PERL_HASH(hash, key, klen); + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else { + PERL_HASH(hash, key, klen); + } #ifdef DYNAMIC_ENV_FETCH if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*); @@ -1359,7 +1386,9 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash) if (key != keysave) k_flags |= HVhek_FREEKEY; } - if (!hash) + if (HvREHASH(hv)) { + PERL_HASH_INTERNAL(hash, key, klen); + } else if (!hash) PERL_HASH(hash, key, klen); #ifdef DYNAMIC_ENV_FETCH @@ -1415,6 +1444,8 @@ S_hsplit(pTHX_ HV *hv) register HE **bep; register HE *entry; register HE **oentry; + int longest_chain = 0; + int was_shared; PL_nomemok = TRUE; #if defined(STRANGE_MALLOC) || defined(MYMALLOC) @@ -1445,6 +1476,9 @@ S_hsplit(pTHX_ HV *hv) aep = (HE**)a; for (i=0; ixhv_fill++; /* HvFILL(hv)++ */ *bep = entry; + right_length++; continue; } - else + else { oentry = &HeNEXT(entry); + left_length++; + } } if (!*aep) /* everything moved */ xhv->xhv_fill--; /* HvFILL(hv)-- */ + /* I think we don't actually need to keep track of the longest length, + merely flag if anything is too long. But for the moment while + developing this code I'll track it. */ + if (left_length > longest_chain) + longest_chain = left_length; + if (right_length > longest_chain) + longest_chain = right_length; + } + + + /* Pick your policy for "hashing isn't working" here: */ + if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv) + || HvREHASH(hv)) { + return; + } + + if (hv == PL_strtab) { + /* Urg. Someone is doing something nasty to the string table. + Can't win. */ + return; + } + + /* Awooga. Awooga. Pathological data. */ + /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n", + longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/ + + ++newsize; + Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char); + was_shared = HvSHAREKEYS(hv); + + xhv->xhv_fill = 0; + HvSHAREKEYS_off(hv); + HvREHASH_on(hv); + + aep = (HE **) xhv->xhv_array; + + for (i=0; ixhv_max); + if (!*bep) + xhv->xhv_fill++; /* HvFILL(hv)++ */ + HeNEXT(entry) = *bep; + *bep = entry; + + entry = next; + } } + Safefree (xhv->xhv_array); + xhv->xhv_array = a; /* HvARRAY(hv) = a */ } void @@ -1566,6 +1676,7 @@ Perl_newHV(pTHX) #ifndef NODEFAULT_SHAREKEYS HvSHAREKEYS_on(hv); /* key-sharing on by default */ #endif + xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */ xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */ xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */ @@ -2039,7 +2150,17 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry) sv = newSVpvn ((char*)as_utf8, utf8_len); SvUTF8_on (sv); Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */ - } else { + } else if (flags & HVhek_REHASH) { + /* We don't have a pointer to the hv, so we have to replicate the + flag into every HEK. This hv is using custom a hasing + algorithm. Hence we can't return a shared string scalar, as + that would contain the (wrong) hash value, and might get passed + into an hv routine with a regular hash */ + + sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek)); + if (HEK_UTF8(hek)) + SvUTF8_on (sv); + } else { sv = newSVpvn_share(HEK_KEY(hek), (HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)), HEK_HASH(hek)); -- cgit v1.2.1