perturb insertion order and update xhv_rand during insertion and S_hsplit()

When inserting into a hash results in a collision the order of the items in the bucket chain is predictable (FILO), and can be used to determine that a collision has occured. When a hash is too small for the number of items it holds we double its size and remap the items as required. During this process the keys in a bucket will reverse order, and exposes information to an attacker that a collision has occured. We therefore use the PL_hash_rand_bits() and the S_ptr_hash() infrastructure to randomly "perturb" the order that colliding items are inserted into the bucket chain. During insertion and mapping instead of doing a simple "insert to top" we check the low bit of PL_hash_rand_bits() and depending if it is set or not we insert at the top of the chain, otherwise second from the top. The end result being that the order in a bucket is less predictable, which should make it harder for an attacker to spot a collision. Every insert (via hv_common), and bucket doubling (via hsplit()) results in us updating PL_hash_rand_bits() using "randomish" data like the hashed bucket address, the hash of the inserted item, and the address of the inserted item. This also updates the xhv_rand() of the hash, if there is one, during S_hsplit() so that the iteration order changes when S_hsplit() is called. This also is intended to make it harder for an attacker to aquire information about collisions.
author: Yves Orton <demerphq@gmail.com> 2013-03-17 20:33:19 +0100
committer: Yves Orton <demerphq@gmail.com> 2013-03-19 00:23:12 +0100
commit: 3078e109184e83a0c0e99d9eea771d67b90f1e72 (patch)
tree: 60126dd73ef86793c4a66ea40f40af2fb3a5d878 /hv.c
parent: 0e0ab62106f892a1b7f00ad117493064bf9d72d1 (diff)
download: perl-3078e109184e83a0c0e99d9eea771d67b90f1e72.tar.gz
1 files changed, 39 insertions, 5 deletions
diff --git a/hv.c b/hv.c
index 29d05477e1..3de86d1e89 100644
--- a/hv.c
+++ b/hv.c
@@ -786,8 +786,20 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
     else                                       /* gotta do the real thing */
 	HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
     HeVAL(entry) = val;
-    HeNEXT(entry) = *oentry;
-    *oentry = entry;
+
+    /* This logic semi-randomizes the insert order in a bucket.
+     * Either we insert into the top, or the slot below the top,
+     * making it harder to see if there is a collision.
+     */
+    PL_hash_rand_bits += (PTRV)entry ^ hash; /* we don't bother to use ptr_hash here */
+    PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
+    if ( !*oentry || (PL_hash_rand_bits & 1) ) {
+        HeNEXT(entry) = *oentry;
+        *oentry = entry;
+    } else {
+        HeNEXT(entry) = HeNEXT(*oentry);
+        HeNEXT(*oentry) = entry;
+    }
 
     if (val == &PL_sv_placeholder)
 	HvPLACEHOLDERS(hv)++;
@@ -1123,8 +1135,20 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
       PL_nomemok = FALSE;
       return;
     }
+    /* the idea of this is that we create a "random" value by hashing the address of
+     * the array, we then use the low bit to decide if we insert at the top, or insert
+     * second from top. After each such insert we rotate the hashed value. So we can
+     * use the same hashed value over and over, and in normal build environments use
+     * very few ops to do so. ROTL32() should produce a single machine operation. */
+    PL_hash_rand_bits += ptr_hash((PTRV)a);
+    PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1);
+
     if (SvOOK(hv)) {
-	Move(&a[oldsize * sizeof(HE*)], &a[newsize * sizeof(HE*)], 1, struct xpvhv_aux);
+        struct xpvhv_aux *const dest
+            = (struct xpvhv_aux*) &a[newsize * sizeof(HE*)];
+        Move(&a[oldsize * sizeof(HE*)], dest, 1, struct xpvhv_aux);
+        /* we reset the iterator's xhv_rand as well, so they get a totally new ordering */
+        dest->xhv_rand = (U32)PL_hash_rand_bits;
     }
 
     PL_nomemok = FALSE;
@@ -1146,8 +1170,18 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
             U32 j = (HeHASH(entry) & newsize);
 	    if (j != (U32)i) {
 		*oentry = HeNEXT(entry);
-                HeNEXT(entry) = aep[j];
-                aep[j] = entry;
+                /* if the target cell is empty insert to top, otherwise
+                 * rotate the bucket rand 1 bit, and use the new low bit
+                 * to decide if we insert at top, or next from top.
+                 * IOW, we rotate only if we are dealing with colliding
+                 * elements. */
+                if (!aep[j] || ((PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1)) & 1)) {
+                    HeNEXT(entry) = aep[j];
+                    aep[j] = entry;
+                } else {
+                    HeNEXT(entry)= HeNEXT(aep[j]);
+                    HeNEXT(aep[j])= entry;
+                }
 	    }
 	    else {
 		oentry = &HeNEXT(entry);
author	Yves Orton <demerphq@gmail.com>	2013-03-17 20:33:19 +0100
committer	Yves Orton <demerphq@gmail.com>	2013-03-19 00:23:12 +0100
commit	3078e109184e83a0c0e99d9eea771d67b90f1e72 (patch)
tree	60126dd73ef86793c4a66ea40f40af2fb3a5d878 /hv.c
parent	0e0ab62106f892a1b7f00ad117493064bf9d72d1 (diff)
download	perl-3078e109184e83a0c0e99d9eea771d67b90f1e72.tar.gz