summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2003-10-16 21:10:27 +0000
committerNicholas Clark <nick@ccl4.org>2003-10-16 21:10:27 +0000
commit4b5190b5321b9b9e2ec46674b256120d4fdab72a (patch)
tree1e544d93aaa7feaa02a7944af04463c96a8acd7e
parent4e380990fe579cbec68c15cb8ee072f2d6644a71 (diff)
downloadperl-4b5190b5321b9b9e2ec46674b256120d4fdab72a.tar.gz
Plan C for foiling the algorithmic complexity attack
(based on Chip's plan A (binary compatibility with 5.8.0 and 5.8.1), Chip's plan B (do something new inside the hv functions) and introspective sort) Provides infrastructure for hashes to change their hash function if necessary, and code in hsplit to detect pathalogical data and instigate a random rehashing. Needs refinement. Let's see how much smoke it creates. p4raw-id: //depot/perl@21471
-rw-r--r--embedvar.h4
-rw-r--r--hv.c141
-rw-r--r--hv.h26
-rw-r--r--intrpvar.h4
-rw-r--r--perl.c4
-rw-r--r--perlapi.h4
-rw-r--r--sv.c1
-rw-r--r--sv.h1
-rw-r--r--util.c2
9 files changed, 174 insertions, 13 deletions
diff --git a/embedvar.h b/embedvar.h
index a4f4a929bf..f58fae9e98 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -320,6 +320,8 @@
#define PL_multi_open (vTHX->Imulti_open)
#define PL_multi_start (vTHX->Imulti_start)
#define PL_multiline (vTHX->Imultiline)
+#define PL_new_hash_seed (vTHX->Inew_hash_seed)
+#define PL_new_hash_seed_set (vTHX->Inew_hash_seed_set)
#define PL_nexttoke (vTHX->Inexttoke)
#define PL_nexttype (vTHX->Inexttype)
#define PL_nextval (vTHX->Inextval)
@@ -624,6 +626,8 @@
#define PL_Imulti_open PL_multi_open
#define PL_Imulti_start PL_multi_start
#define PL_Imultiline PL_multiline
+#define PL_Inew_hash_seed PL_new_hash_seed
+#define PL_Inew_hash_seed_set PL_new_hash_seed_set
#define PL_Inexttoke PL_nexttoke
#define PL_Inexttype PL_nexttype
#define PL_Inextval PL_nextval
diff --git a/hv.c b/hv.c
index 965b6ae638..58799d5af2 100644
--- a/hv.c
+++ b/hv.c
@@ -274,7 +274,11 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags)
}
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* entry = (HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
entry = ((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -445,7 +449,9 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
flags |= HVhek_WASUTF8 | HVhek_FREEKEY;
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
@@ -628,7 +634,12 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val,
if (flags)
HvHASKFLAGS_on((SV*)hv);
- if (!hash)
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
if (!xhv->xhv_array /* !HvARRAY(hv) */)
@@ -798,7 +809,12 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash)
HvHASKFLAGS_on((SV*)hv);
}
- if (!hash) {
+ if (HvREHASH(hv)) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK, so that hv_iterkeysv can see it. */
+ flags |= HVhek_REHASH;
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
if SvIsCOW_shared_hash(keysv) {
hash = SvUVX(keysv);
} else {
@@ -950,7 +966,11 @@ Perl_hv_delete(pTHX_ HV *hv, const char *key, I32 klen, I32 flags)
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1107,8 +1127,11 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash) {
PERL_HASH(hash, key, klen);
+ }
/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
@@ -1255,7 +1278,11 @@ Perl_hv_exists(pTHX_ HV *hv, const char *key, I32 klen)
k_flags |= HVhek_FREEKEY;
}
- PERL_HASH(hash, key, klen);
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else {
+ PERL_HASH(hash, key, klen);
+ }
#ifdef DYNAMIC_ENV_FETCH
if (!xhv->xhv_array /* !HvARRAY(hv) */) entry = Null(HE*);
@@ -1359,7 +1386,9 @@ Perl_hv_exists_ent(pTHX_ HV *hv, SV *keysv, U32 hash)
if (key != keysave)
k_flags |= HVhek_FREEKEY;
}
- if (!hash)
+ if (HvREHASH(hv)) {
+ PERL_HASH_INTERNAL(hash, key, klen);
+ } else if (!hash)
PERL_HASH(hash, key, klen);
#ifdef DYNAMIC_ENV_FETCH
@@ -1415,6 +1444,8 @@ S_hsplit(pTHX_ HV *hv)
register HE **bep;
register HE *entry;
register HE **oentry;
+ int longest_chain = 0;
+ int was_shared;
PL_nomemok = TRUE;
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
@@ -1445,6 +1476,9 @@ S_hsplit(pTHX_ HV *hv)
aep = (HE**)a;
for (i=0; i<oldsize; i++,aep++) {
+ int left_length = 0;
+ int right_length = 0;
+
if (!*aep) /* non-existent */
continue;
bep = aep+oldsize;
@@ -1455,14 +1489,90 @@ S_hsplit(pTHX_ HV *hv)
if (!*bep)
xhv->xhv_fill++; /* HvFILL(hv)++ */
*bep = entry;
+ right_length++;
continue;
}
- else
+ else {
oentry = &HeNEXT(entry);
+ left_length++;
+ }
}
if (!*aep) /* everything moved */
xhv->xhv_fill--; /* HvFILL(hv)-- */
+ /* I think we don't actually need to keep track of the longest length,
+ merely flag if anything is too long. But for the moment while
+ developing this code I'll track it. */
+ if (left_length > longest_chain)
+ longest_chain = left_length;
+ if (right_length > longest_chain)
+ longest_chain = right_length;
+ }
+
+
+ /* Pick your policy for "hashing isn't working" here: */
+ if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv)
+ || HvREHASH(hv)) {
+ return;
+ }
+
+ if (hv == PL_strtab) {
+ /* Urg. Someone is doing something nasty to the string table.
+ Can't win. */
+ return;
+ }
+
+ /* Awooga. Awooga. Pathological data. */
+ /*PerlIO_printf(PerlIO_stderr(), "Awooga %d of %d with %d/%d buckets\n",
+ longest_chain, HvTOTALKEYS(hv), HvFILL(hv), 1+HvMAX(hv));*/
+
+ ++newsize;
+ Newz(2, a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
+ was_shared = HvSHAREKEYS(hv);
+
+ xhv->xhv_fill = 0;
+ HvSHAREKEYS_off(hv);
+ HvREHASH_on(hv);
+
+ aep = (HE **) xhv->xhv_array;
+
+ for (i=0; i<newsize; i++,aep++) {
+ entry = *aep;
+ while (entry) {
+ /* We're going to trash this HE's next pointer when we chain it
+ into the new hash below, so store where we go next. */
+ HE *next = HeNEXT(entry);
+ UV hash;
+
+ /* Rehash it */
+ PERL_HASH_INTERNAL(hash, HeKEY(entry), HeKLEN(entry));
+
+ if (was_shared) {
+ /* Unshare it. */
+ HEK *new_hek
+ = save_hek_flags(HeKEY(entry), HeKLEN(entry),
+ hash, HeKFLAGS(entry));
+ unshare_hek (HeKEY_hek(entry));
+ HeKEY_hek(entry) = new_hek;
+ } else {
+ /* Not shared, so simply write the new hash in. */
+ HeHASH(entry) = hash;
+ }
+ /*PerlIO_printf(PerlIO_stderr(), "%d ", HeKFLAGS(entry));*/
+ HEK_REHASH_on(HeKEY_hek(entry));
+ /*PerlIO_printf(PerlIO_stderr(), "%d\n", HeKFLAGS(entry));*/
+
+ /* Copy oentry to the correct new chain. */
+ bep = ((HE**)a) + (hash & (I32) xhv->xhv_max);
+ if (!*bep)
+ xhv->xhv_fill++; /* HvFILL(hv)++ */
+ HeNEXT(entry) = *bep;
+ *bep = entry;
+
+ entry = next;
+ }
}
+ Safefree (xhv->xhv_array);
+ xhv->xhv_array = a; /* HvARRAY(hv) = a */
}
void
@@ -1566,6 +1676,7 @@ Perl_newHV(pTHX)
#ifndef NODEFAULT_SHAREKEYS
HvSHAREKEYS_on(hv); /* key-sharing on by default */
#endif
+
xhv->xhv_max = 7; /* HvMAX(hv) = 7 (start with 8 buckets) */
xhv->xhv_fill = 0; /* HvFILL(hv) = 0 */
xhv->xhv_pmroot = 0; /* HvPMROOT(hv) = 0 */
@@ -2039,7 +2150,17 @@ Perl_hv_iterkeysv(pTHX_ register HE *entry)
sv = newSVpvn ((char*)as_utf8, utf8_len);
SvUTF8_on (sv);
Safefree (as_utf8); /* bytes_to_utf8() allocates a new string */
- } else {
+ } else if (flags & HVhek_REHASH) {
+ /* We don't have a pointer to the hv, so we have to replicate the
+ flag into every HEK. This hv is using custom a hasing
+ algorithm. Hence we can't return a shared string scalar, as
+ that would contain the (wrong) hash value, and might get passed
+ into an hv routine with a regular hash */
+
+ sv = newSVpvn (HEK_KEY(hek), HEK_LEN(hek));
+ if (HEK_UTF8(hek))
+ SvUTF8_on (sv);
+ } else {
sv = newSVpvn_share(HEK_KEY(hek),
(HEK_UTF8(hek) ? -HEK_LEN(hek) : HEK_LEN(hek)),
HEK_HASH(hek));
diff --git a/hv.h b/hv.h
index 4fa4239be9..1c7fcda241 100644
--- a/hv.h
+++ b/hv.h
@@ -89,6 +89,24 @@ struct xpvhv {
(hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
} STMT_END
+#ifdef PERL_IN_HV_C
+#define PERL_HASH_INTERNAL(hash,str,len) \
+ STMT_START { \
+ register const char *s_PeRlHaSh_tmp = str; \
+ register const unsigned char *s_PeRlHaSh = (const unsigned char *)s_PeRlHaSh_tmp; \
+ register I32 i_PeRlHaSh = len; \
+ register U32 hash_PeRlHaSh = PL_new_hash_seed; \
+ while (i_PeRlHaSh--) { \
+ hash_PeRlHaSh += *s_PeRlHaSh++; \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 10); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 6); \
+ } \
+ hash_PeRlHaSh += (hash_PeRlHaSh << 3); \
+ hash_PeRlHaSh ^= (hash_PeRlHaSh >> 11); \
+ (hash) = (hash_PeRlHaSh + (hash_PeRlHaSh << 15)); \
+ } STMT_END
+#endif
+
/*
=head1 Hash Manipulation Functions
@@ -203,6 +221,10 @@ C<SV*>.
#define HvLAZYDEL_on(hv) (SvFLAGS(hv) |= SVphv_LAZYDEL)
#define HvLAZYDEL_off(hv) (SvFLAGS(hv) &= ~SVphv_LAZYDEL)
+#define HvREHASH(hv) (SvFLAGS(hv) & SVphv_REHASH)
+#define HvREHASH_on(hv) (SvFLAGS(hv) |= SVphv_REHASH)
+#define HvREHASH_off(hv) (SvFLAGS(hv) &= ~SVphv_REHASH)
+
/* Maybe amagical: */
/* #define HV_AMAGICmb(hv) (SvFLAGS(hv) & (SVpgv_badAM | SVpgv_AM)) */
@@ -224,6 +246,7 @@ C<SV*>.
#define HeKLEN(he) HEK_LEN(HeKEY_hek(he))
#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he))
#define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he))
+#define HeKREHASH(he) HEK_REHASH(HeKEY_hek(he))
#define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he))
#define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he))
#define HeVAL(he) (he)->hent_val
@@ -254,6 +277,7 @@ C<SV*>.
#define HVhek_UTF8 0x01 /* Key is utf8 encoded. */
#define HVhek_WASUTF8 0x02 /* Key is bytes here, but was supplied as utf8. */
+#define HVhek_REHASH 0x04 /* This key is in an hv using a custom HASH . */
#define HVhek_FREEKEY 0x100 /* Internal flag to say key is malloc()ed. */
#define HVhek_PLACEHOLD 0x200 /* Internal flag to create placeholder.
* (may change, but Storable is a core module) */
@@ -265,6 +289,8 @@ C<SV*>.
#define HEK_WASUTF8(hek) (HEK_FLAGS(hek) & HVhek_WASUTF8)
#define HEK_WASUTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_WASUTF8)
#define HEK_WASUTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_WASUTF8)
+#define HEK_REHASH(hek) (HEK_FLAGS(hek) & HVhek_REHASH)
+#define HEK_REHASH_on(hek) (HEK_FLAGS(hek) |= HVhek_REHASH)
/* calculate HV array allocation */
#if defined(STRANGE_MALLOC) || defined(MYMALLOC)
diff --git a/intrpvar.h b/intrpvar.h
index 6a34ea4e67..09709ea123 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -531,6 +531,10 @@ PERLVAR(IDBassertion, SV *)
PERLVARI(Icv_has_eval, I32, 0) /* PL_compcv includes an entereval or similar */
+PERLVARI(Inew_hash_seed, UV, 0) /* 582 hash initializer */
+
+PERLVARI(Inew_hash_seed_set, bool, FALSE) /* 582 hash initialized? */
+
/* New variables must be added to the very end, before this comment,
* for binary compatibility (the offsets of the old members must not change).
* (Don't forget to add your variable also to perl_clone()!)
diff --git a/perl.c b/perl.c
index 889b49326e..a6480a0c72 100644
--- a/perl.c
+++ b/perl.c
@@ -918,7 +918,7 @@ setuid perl scripts securely.\n");
* it is your responsibility to provide a good random seed!
* You can also define PERL_HASH_SEED in compile time, see hv.h. */
if (!PL_hash_seed_set)
- PL_hash_seed = get_hash_seed();
+ PL_new_hash_seed = get_hash_seed();
{
char *s = PerlEnv_getenv("PERL_HASH_SEED_DEBUG");
@@ -927,7 +927,7 @@ setuid perl scripts securely.\n");
if (i == 1)
PerlIO_printf(Perl_debug_log, "HASH_SEED = %"UVuf"\n",
- PL_hash_seed);
+ PL_new_hash_seed);
}
}
#endif /* #if defined(USE_HASH_SEED) || defined(USE_HASH_SEED_EXPLICIT) */
diff --git a/perlapi.h b/perlapi.h
index 639f70c10e..ac14cf9585 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -398,6 +398,10 @@ END_EXTERN_C
#define PL_multi_start (*Perl_Imulti_start_ptr(aTHX))
#undef PL_multiline
#define PL_multiline (*Perl_Imultiline_ptr(aTHX))
+#undef PL_new_hash_seed
+#define PL_new_hash_seed (*Perl_Inew_hash_seed_ptr(aTHX))
+#undef PL_new_hash_seed_set
+#define PL_new_hash_seed_set (*Perl_Inew_hash_seed_set_ptr(aTHX))
#undef PL_nexttoke
#define PL_nexttoke (*Perl_Inexttoke_ptr(aTHX))
#undef PL_nexttype
diff --git a/sv.c b/sv.c
index 4e0f43e9a5..f5eab2f52b 100644
--- a/sv.c
+++ b/sv.c
@@ -11349,6 +11349,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
PL_glob_index = proto_perl->Iglob_index;
PL_srand_called = proto_perl->Isrand_called;
PL_hash_seed = proto_perl->Ihash_seed;
+ PL_new_hash_seed = proto_perl->Inew_hash_seed;
PL_uudmap['M'] = 0; /* reinits on demand */
PL_bitcount = Nullch; /* reinits on demand */
diff --git a/sv.h b/sv.h
index 5d2fab27bc..a9a57dfdda 100644
--- a/sv.h
+++ b/sv.h
@@ -213,6 +213,7 @@ perform the upgrade if necessary. See C<svtype>.
#define SVrepl_EVAL 0x40000000 /* Replacement part of s///e */
+#define SVphv_REHASH 0x10000000 /* HV is recalculating hash values */
#define SVphv_SHAREKEYS 0x20000000 /* keys live on shared string table */
#define SVphv_LAZYDEL 0x40000000 /* entry in xhv_eiter must be deleted */
#define SVphv_HASKFLAGS 0x80000000 /* keys have flag byte after hash */
diff --git a/util.c b/util.c
index 88b07cb5bb..b196d9b4c0 100644
--- a/util.c
+++ b/util.c
@@ -4427,7 +4427,7 @@ Perl_get_hash_seed(pTHX)
Perl_croak(aTHX_ "Your random numbers are not that random");
}
}
- PL_hash_seed_set = TRUE;
+ PL_new_hash_seed_set = TRUE;
return myseed;
}