Harden hashes against hash seed discovery by randomizing hash iteration

Adds: S_ptr_hash() - A new static function in hv.c which can be used to hash a pointer or integer. PL_hash_rand_bits - A new interpreter variable used as a cheap provider of "semi-random" state for use by the hash infrastructure. xpvhv_aux.xhv_rand - Used as a mask which is xored against the xpvhv_aux.riter during iteration to randomize the order the actual buckets are visited. PL_hash_rand_bits is initialized as interpreter start from the random hash seed, and then modified by "mixing in" the result of ptr_hash() on the bucket array pointer in the hv (HvARRAY(hv)) every time hv_auxinit() allocates a new iterator structure. The net result is that every hash has its own iteration order, which should make it much more difficult to determine what the current hash seed is. This required some test to be restructured, as they tested for something that was not necessarily true, we never guaranteed that two hashes with the same keys would produce the same key order, we merely promised that using keys(), values(), or each() on the same hash, without any insertions in between, would produce the same order of visiting the key/values.
author: Yves Orton <demerphq@gmail.com> 2013-03-17 20:19:09 +0100
committer: Yves Orton <demerphq@gmail.com> 2013-03-19 00:23:11 +0100
commit: 0e0ab62106f892a1b7f00ad117493064bf9d72d1 (patch)
tree: fdd83225227f4b59da3cb0fbb7e21e702f515e56
parent: b716320d9d4e3483bbddcbf6c6977a2a6a0efa1e (diff)
download: perl-0e0ab62106f892a1b7f00ad117493064bf9d72d1.tar.gz
9 files changed, 117 insertions, 36 deletions
diff --git a/embed.fnc b/embed.fnc
index 2f5e089493..ba20cf7276 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1778,7 +1778,8 @@ sn	|void	|hv_magic_check	|NN HV *hv|NN bool *needs_copy|NN bool *needs_store
 s	|void	|unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
 sR	|HEK*	|share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
 rs	|void	|hv_notallowed	|int flags|NN const char *key|I32 klen|NN const char *msg
-sn	|struct xpvhv_aux*|hv_auxinit|NN HV *hv
+sn      |U32|ptr_hash|PTRV u
+s	|struct xpvhv_aux*|hv_auxinit|NN HV *hv
 sM	|SV*	|hv_delete_common|NULLOK HV *hv|NULLOK SV *keysv \
 		|NULLOK const char *key|STRLEN klen|int k_flags|I32 d_flags \
 		|U32 hash
diff --git a/embed.h b/embed.h
index 248ed509a1..7f9be531be 100644
--- a/embed.h
+++ b/embed.h
@@ -1376,12 +1376,13 @@
 #define clear_placeholders(a,b)	S_clear_placeholders(aTHX_ a,b)
 #define hfreeentries(a)		S_hfreeentries(aTHX_ a)
 #define hsplit(a,b,c)		S_hsplit(aTHX_ a,b,c)
-#define hv_auxinit		S_hv_auxinit
+#define hv_auxinit(a)		S_hv_auxinit(aTHX_ a)
 #define hv_delete_common(a,b,c,d,e,f,g)	S_hv_delete_common(aTHX_ a,b,c,d,e,f,g)
 #define hv_free_ent_ret(a,b)	S_hv_free_ent_ret(aTHX_ a,b)
 #define hv_magic_check		S_hv_magic_check
 #define hv_notallowed(a,b,c,d)	S_hv_notallowed(aTHX_ a,b,c,d)
 #define new_he()		S_new_he(aTHX)
+#define ptr_hash		S_ptr_hash
 #define refcounted_he_value(a)	S_refcounted_he_value(aTHX_ a)
 #define save_hek_flags		S_save_hek_flags
 #define share_hek_flags(a,b,c,d)	S_share_hek_flags(aTHX_ a,b,c,d)
diff --git a/embedvar.h b/embedvar.h
index 1731bde313..a738dd7f0f 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -154,6 +154,7 @@
 #define PL_gensym		(vTHX->Igensym)
 #define PL_globalstash		(vTHX->Iglobalstash)
 #define PL_globhook		(vTHX->Iglobhook)
+#define PL_hash_rand_bits	(vTHX->Ihash_rand_bits)
 #define PL_hintgv		(vTHX->Ihintgv)
 #define PL_hints		(vTHX->Ihints)
 #define PL_hv_fetch_ent_mh	(vTHX->Ihv_fetch_ent_mh)
diff --git a/hv.c b/hv.c
index 809bd00e3a..29d05477e1 100644
--- a/hv.c
+++ b/hv.c
@@ -1757,27 +1757,66 @@ Perl_hv_fill(pTHX_ HV const *const hv)
     return count;
 }
 
+/* hash a pointer to a U32 - Used in the hash traversal randomization
+ * and bucket order randomization code
+ *
+ * this code was derived from Sereal, which was derived from autobox.
+ */
+
+PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) {
+#if PTRSIZE == 8
+    /*
+     * This is one of Thomas Wang's hash functions for 64-bit integers from:
+     * http://www.concentric.net/~Ttwang/tech/inthash.htm
+     */
+    u = (~u) + (u << 18);
+    u = u ^ (u >> 31);
+    u = u * 21;
+    u = u ^ (u >> 11);
+    u = u + (u << 6);
+    u = u ^ (u >> 22);
+#else
+    /*
+     * This is one of Bob Jenkins' hash functions for 32-bit integers
+     * from: http://burtleburtle.net/bob/hash/integer.html
+     */
+    u = (u + 0x7ed55d16) + (u << 12);
+    u = (u ^ 0xc761c23c) ^ (u >> 19);
+    u = (u + 0x165667b1) + (u << 5);
+    u = (u + 0xd3a2646c) ^ (u << 9);
+    u = (u + 0xfd7046c5) + (u << 3);
+    u = (u ^ 0xb55a4f09) ^ (u >> 16);
+#endif
+    return (U32)u;
+}
+
+
 static struct xpvhv_aux*
-S_hv_auxinit(HV *hv) {
+S_hv_auxinit(pTHX_ HV *hv) {
     struct xpvhv_aux *iter;
     char *array;
 
     PERL_ARGS_ASSERT_HV_AUXINIT;
 
-    if (!HvARRAY(hv)) {
-	Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
-	    + sizeof(struct xpvhv_aux), char);
-    } else {
-	array = (char *) HvARRAY(hv);
-	Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
-	      + sizeof(struct xpvhv_aux), char);
+    if (!SvOOK(hv)) {
+        if (!HvARRAY(hv)) {
+            Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
+                + sizeof(struct xpvhv_aux), char);
+        } else {
+            array = (char *) HvARRAY(hv);
+            Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1)
+                  + sizeof(struct xpvhv_aux), char);
+        }
+        HvARRAY(hv) = (HE**)array;
+        SvOOK_on(hv);
+        PL_hash_rand_bits += ptr_hash((PTRV)array);
+        PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
     }
-    HvARRAY(hv) = (HE**) array;
-    SvOOK_on(hv);
     iter = HvAUX(hv);
 
     iter->xhv_riter = -1; 	/* HvRITER(hv) = -1 */
     iter->xhv_eiter = NULL;	/* HvEITER(hv) = NULL */
+    iter->xhv_rand = (U32)PL_hash_rand_bits;
     iter->xhv_name_u.xhvnameu_name = 0;
     iter->xhv_name_count = 0;
     iter->xhv_backreferences = 0;
@@ -2292,7 +2331,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags)
 		iter->xhv_riter = -1; /* HvRITER(hv) = -1 */
 		break;
 	    }
-	    entry = (HvARRAY(hv))[iter->xhv_riter];
+            entry = (HvARRAY(hv))[(iter->xhv_riter ^ iter->xhv_rand) & xhv->xhv_max];
 
 	    if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) {
 		/* If we have an entry, but it's a placeholder, don't count it.
diff --git a/hv.h b/hv.h
index dddeb027e9..f2dd8e131f 100644
--- a/hv.h
+++ b/hv.h
@@ -92,6 +92,7 @@ struct xpvhv_aux {
     I32		xhv_name_count;
     struct mro_meta *xhv_mro_meta;
     HV *	xhv_super;	/* SUPER method cache */
+    U32         xhv_rand;       /* random value for hash traversal */
 };
 
 /* hash structure: */
diff --git a/intrpvar.h b/intrpvar.h
index b5902a5a41..274e7af040 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -64,6 +64,7 @@ PERLVAR(I, markstack,	I32 *)		/* stack_sp locations we're
 PERLVAR(I, markstack_ptr, I32 *)
 PERLVAR(I, markstack_max, I32 *)
 
+PERLVARI(I, hash_rand_bits, UV, 0)      /* used to randomize hash stuff */
 PERLVAR(I, strtab,	HV *)		/* shared string table */
 
 /* Fields used by magic variables such as $@, $/ and so on */
diff --git a/proto.h b/proto.h
index 35d49db3d6..3d546a0fdf 100644
--- a/proto.h
+++ b/proto.h
@@ -5706,8 +5706,8 @@ STATIC void	S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
 #define PERL_ARGS_ASSERT_HSPLIT	\
 	assert(hv)
 
-STATIC struct xpvhv_aux*	S_hv_auxinit(HV *hv)
-			__attribute__nonnull__(1);
+STATIC struct xpvhv_aux*	S_hv_auxinit(pTHX_ HV *hv)
+			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_HV_AUXINIT	\
 	assert(hv)
 
@@ -5736,6 +5736,7 @@ STATIC HE*	S_new_he(pTHX)
 			__attribute__malloc__
 			__attribute__warn_unused_result__;
 
+STATIC U32	S_ptr_hash(PTRV u);
 STATIC SV *	S_refcounted_he_value(pTHX_ const struct refcounted_he *he)
 			__attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE	\
diff --git a/t/op/smartkve.t b/t/op/smartkve.t
index 7e5d67e7c1..abd6abfd53 100644
--- a/t/op/smartkve.t
+++ b/t/op/smartkve.t
@@ -23,12 +23,19 @@ sub j { join(":",@_) }
 # match the inserted order. So we declare one hash
 # and then make all our copies from that, which should
 # mean all the copies have the same internal structure.
+#
+# And these days, even if all that weren't true, we now
+# per-hash randomize keys/values. So, we cant expect two
+# hashes with the same internal structure to return the
+# same thing at all. All we *can* expect is that keys()
+# and values() use the same ordering.
 our %base_hash;
 
 BEGIN { # in BEGIN for "use constant ..." later
-  %base_hash= (  pi => 3.14, e => 2.72, i => -1 );
+  # values match keys here so we can easily check that keys(%hash) == values(%hash)
+  %base_hash= (  pi => 'pi', e => 'e', i => 'i' );
   $array = [ qw(pi e i) ];
-  $values = [ 3.14, 2.72, -1 ];
+  $values = [ qw(pi e i) ];
   $hash  = { %base_hash } ;
   $data = {
     hash => { %base_hash },
@@ -118,16 +125,25 @@ is(keys $obj->array     ,3, 'Scalar: keys $obj->array');
 
 # Keys -- list
 
-$h_expect = j(keys %$hash);
+$h_expect = j(sort keys %base_hash);
 $a_expect = j(keys @$array);
 
-is(j(keys $hash)                ,$h_expect, 'List: keys $hash');
-is(j(keys $data->{hash})        ,$h_expect, 'List: keys $data->{hash}');
-is(j(keys CONST_HASH)           ,$h_expect, 'List: keys CONST_HASH');
-is(j(keys CONST_HASH())         ,$h_expect, 'List: keys CONST_HASH()');
-is(j(keys hash_sub)             ,$h_expect, 'List: keys hash_sub');
-is(j(keys hash_sub())           ,$h_expect, 'List: keys hash_sub()');
-is(j(keys $obj->hash)           ,$h_expect, 'List: keys $obj->hash');
+is(j(sort keys $hash)           ,$h_expect, 'List: sort keys $hash');
+is(j(sort keys $data->{hash})   ,$h_expect, 'List: sort keys $data->{hash}');
+is(j(sort keys CONST_HASH)      ,$h_expect, 'List: sort keys CONST_HASH');
+is(j(sort keys CONST_HASH())    ,$h_expect, 'List: sort keys CONST_HASH()');
+is(j(sort keys hash_sub)        ,$h_expect, 'List: sort keys hash_sub');
+is(j(sort keys hash_sub())      ,$h_expect, 'List: sort keys hash_sub()');
+is(j(sort keys $obj->hash)      ,$h_expect, 'List: sort keys $obj->hash');
+
+is(j(keys $hash)                ,j(values $hash),           'List: keys $hash == values $hash');
+is(j(keys $data->{hash})        ,j(values $data->{hash}),   'List: keys $data->{hash} == values $data->{hash}');
+is(j(keys CONST_HASH)           ,j(values CONST_HASH),      'List: keys CONST_HASH == values CONST_HASH');
+is(j(keys CONST_HASH())         ,j(values CONST_HASH()),    'List: keys CONST_HASH() == values CONST_HASH()');
+is(j(keys hash_sub)             ,j(values hash_sub),        'List: keys hash_sub == values hash_sub');
+is(j(keys hash_sub())           ,j(values hash_sub()),      'List: keys hash_sub() == values hash_sub()');
+is(j(keys $obj->hash)           ,j(values $obj->hash),      'List: keys $obj->hash == values obj->hash');
+
 is(j(keys $array)               ,$a_expect, 'List: keys $array');
 is(j(keys $data->{array})       ,$a_expect, 'List: keys $data->{array}');
 is(j(keys CONST_ARRAY)          ,$a_expect, 'List: keys CONST_ARRAY');
@@ -221,16 +237,25 @@ is(values $obj->array     ,3, 'Scalar: values $obj->array');
 
 # Values -- list
 
-$h_expect = j(values %$hash);
+$h_expect = j(sort values %base_hash);
 $a_expect = j(values @$array);
 
-is(j(values $hash)                ,$h_expect, 'List: values $hash');
-is(j(values $data->{hash})        ,$h_expect, 'List: values $data->{hash}');
-is(j(values CONST_HASH)           ,$h_expect, 'List: values CONST_HASH');
-is(j(values CONST_HASH())         ,$h_expect, 'List: values CONST_HASH()');
-is(j(values hash_sub)             ,$h_expect, 'List: values hash_sub');
-is(j(values hash_sub())           ,$h_expect, 'List: values hash_sub()');
-is(j(values $obj->hash)           ,$h_expect, 'List: values $obj->hash');
+is(j(sort values $hash)                ,$h_expect, 'List: sort values $hash');
+is(j(sort values $data->{hash})        ,$h_expect, 'List: sort values $data->{hash}');
+is(j(sort values CONST_HASH)           ,$h_expect, 'List: sort values CONST_HASH');
+is(j(sort values CONST_HASH())         ,$h_expect, 'List: sort values CONST_HASH()');
+is(j(sort values hash_sub)             ,$h_expect, 'List: sort values hash_sub');
+is(j(sort values hash_sub())           ,$h_expect, 'List: sort values hash_sub()');
+is(j(sort values $obj->hash)           ,$h_expect, 'List: sort values $obj->hash');
+
+is(j(values $hash)                ,j(keys $hash),           'List: values $hash == keys $hash');
+is(j(values $data->{hash})        ,j(keys $data->{hash}),   'List: values $data->{hash} == keys $data->{hash}');
+is(j(values CONST_HASH)           ,j(keys CONST_HASH),      'List: values CONST_HASH == keys CONST_HASH');
+is(j(values CONST_HASH())         ,j(keys CONST_HASH()),    'List: values CONST_HASH() == keys CONST_HASH()');
+is(j(values hash_sub)             ,j(keys hash_sub),        'List: values hash_sub == keys hash_sub');
+is(j(values hash_sub())           ,j(keys hash_sub()),      'List: values hash_sub() == keys hash_sub()');
+is(j(values $obj->hash)           ,j(keys $obj->hash),      'List: values $obj->hash == keys $obj->hash');
+
 is(j(values $array)               ,$a_expect, 'List: values $array');
 is(j(values $data->{array})       ,$a_expect, 'List: values $data->{array}');
 is(j(values CONST_ARRAY)          ,$a_expect, 'List: values CONST_ARRAY');
diff --git a/util.c b/util.c
index 2c745bfd6c..5c695b8a6f 100644
--- a/util.c
+++ b/util.c
@@ -5681,12 +5681,23 @@ Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer)
     else
 #endif
     {
+        unsigned char *ptr= seed_buffer;
         (void)seedDrand01((Rand_seed_t)seed());
 
-        while (seed_buffer < end) {
-            *seed_buffer++ = (unsigned char)(Drand01() * (U8_MAX+1));
+        while (ptr < end) {
+            *ptr++ = (unsigned char)(Drand01() * (U8_MAX+1));
         }
-     }
+    }
+    {   /* initialize PL_hash_rand_bits from the hash seed.
+         * This value is highly volatile, it is updated every
+         * hash insert, and is used as part of hash bucket chain
+         * randomization and hash iterator randomization. */
+        unsigned long i;
+        PL_hash_rand_bits= 0;
+        for( i = 0; i < sizeof(UV) ; i++ ) {
+            PL_hash_rand_bits = (PL_hash_rand_bits << 8) | seed_buffer[i % PERL_HASH_SEED_BYTES];
+        }
+    }
 }
 
 #ifdef PERL_GLOBAL_STRUCT
author	Yves Orton <demerphq@gmail.com>	2013-03-17 20:19:09 +0100
committer	Yves Orton <demerphq@gmail.com>	2013-03-19 00:23:11 +0100
commit	0e0ab62106f892a1b7f00ad117493064bf9d72d1 (patch)
tree	fdd83225227f4b59da3cb0fbb7e21e702f515e56
parent	b716320d9d4e3483bbddcbf6c6977a2a6a0efa1e (diff)
download	perl-0e0ab62106f892a1b7f00ad117493064bf9d72d1.tar.gz