diff options
-rw-r--r-- | embed.fnc | 3 | ||||
-rw-r--r-- | embed.h | 3 | ||||
-rw-r--r-- | embedvar.h | 1 | ||||
-rw-r--r-- | hv.c | 61 | ||||
-rw-r--r-- | hv.h | 1 | ||||
-rw-r--r-- | intrpvar.h | 1 | ||||
-rw-r--r-- | proto.h | 5 | ||||
-rw-r--r-- | t/op/smartkve.t | 61 | ||||
-rw-r--r-- | util.c | 17 |
9 files changed, 117 insertions, 36 deletions
@@ -1778,7 +1778,8 @@ sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags rs |void |hv_notallowed |int flags|NN const char *key|I32 klen|NN const char *msg -sn |struct xpvhv_aux*|hv_auxinit|NN HV *hv +sn |U32|ptr_hash|PTRV u +s |struct xpvhv_aux*|hv_auxinit|NN HV *hv sM |SV* |hv_delete_common|NULLOK HV *hv|NULLOK SV *keysv \ |NULLOK const char *key|STRLEN klen|int k_flags|I32 d_flags \ |U32 hash @@ -1376,12 +1376,13 @@ #define clear_placeholders(a,b) S_clear_placeholders(aTHX_ a,b) #define hfreeentries(a) S_hfreeentries(aTHX_ a) #define hsplit(a,b,c) S_hsplit(aTHX_ a,b,c) -#define hv_auxinit S_hv_auxinit +#define hv_auxinit(a) S_hv_auxinit(aTHX_ a) #define hv_delete_common(a,b,c,d,e,f,g) S_hv_delete_common(aTHX_ a,b,c,d,e,f,g) #define hv_free_ent_ret(a,b) S_hv_free_ent_ret(aTHX_ a,b) #define hv_magic_check S_hv_magic_check #define hv_notallowed(a,b,c,d) S_hv_notallowed(aTHX_ a,b,c,d) #define new_he() S_new_he(aTHX) +#define ptr_hash S_ptr_hash #define refcounted_he_value(a) S_refcounted_he_value(aTHX_ a) #define save_hek_flags S_save_hek_flags #define share_hek_flags(a,b,c,d) S_share_hek_flags(aTHX_ a,b,c,d) diff --git a/embedvar.h b/embedvar.h index 1731bde313..a738dd7f0f 100644 --- a/embedvar.h +++ b/embedvar.h @@ -154,6 +154,7 @@ #define PL_gensym (vTHX->Igensym) #define PL_globalstash (vTHX->Iglobalstash) #define PL_globhook (vTHX->Iglobhook) +#define PL_hash_rand_bits (vTHX->Ihash_rand_bits) #define PL_hintgv (vTHX->Ihintgv) #define PL_hints (vTHX->Ihints) #define PL_hv_fetch_ent_mh (vTHX->Ihv_fetch_ent_mh) @@ -1757,27 +1757,66 @@ Perl_hv_fill(pTHX_ HV const *const hv) return count; } +/* hash a pointer to a U32 - Used in the hash traversal randomization + * and bucket order randomization code + * + * this code was derived from Sereal, which was derived from autobox. + */ + +PERL_STATIC_INLINE U32 S_ptr_hash(PTRV u) { +#if PTRSIZE == 8 + /* + * This is one of Thomas Wang's hash functions for 64-bit integers from: + * http://www.concentric.net/~Ttwang/tech/inthash.htm + */ + u = (~u) + (u << 18); + u = u ^ (u >> 31); + u = u * 21; + u = u ^ (u >> 11); + u = u + (u << 6); + u = u ^ (u >> 22); +#else + /* + * This is one of Bob Jenkins' hash functions for 32-bit integers + * from: http://burtleburtle.net/bob/hash/integer.html + */ + u = (u + 0x7ed55d16) + (u << 12); + u = (u ^ 0xc761c23c) ^ (u >> 19); + u = (u + 0x165667b1) + (u << 5); + u = (u + 0xd3a2646c) ^ (u << 9); + u = (u + 0xfd7046c5) + (u << 3); + u = (u ^ 0xb55a4f09) ^ (u >> 16); +#endif + return (U32)u; +} + + static struct xpvhv_aux* -S_hv_auxinit(HV *hv) { +S_hv_auxinit(pTHX_ HV *hv) { struct xpvhv_aux *iter; char *array; PERL_ARGS_ASSERT_HV_AUXINIT; - if (!HvARRAY(hv)) { - Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); - } else { - array = (char *) HvARRAY(hv); - Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) - + sizeof(struct xpvhv_aux), char); + if (!SvOOK(hv)) { + if (!HvARRAY(hv)) { + Newxz(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } else { + array = (char *) HvARRAY(hv); + Renew(array, PERL_HV_ARRAY_ALLOC_BYTES(HvMAX(hv) + 1) + + sizeof(struct xpvhv_aux), char); + } + HvARRAY(hv) = (HE**)array; + SvOOK_on(hv); + PL_hash_rand_bits += ptr_hash((PTRV)array); + PL_hash_rand_bits = ROTL_UV(PL_hash_rand_bits,1); } - HvARRAY(hv) = (HE**) array; - SvOOK_on(hv); iter = HvAUX(hv); iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ + iter->xhv_rand = (U32)PL_hash_rand_bits; iter->xhv_name_u.xhvnameu_name = 0; iter->xhv_name_count = 0; iter->xhv_backreferences = 0; @@ -2292,7 +2331,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) iter->xhv_riter = -1; /* HvRITER(hv) = -1 */ break; } - entry = (HvARRAY(hv))[iter->xhv_riter]; + entry = (HvARRAY(hv))[(iter->xhv_riter ^ iter->xhv_rand) & xhv->xhv_max]; if (!(flags & HV_ITERNEXT_WANTPLACEHOLDERS)) { /* If we have an entry, but it's a placeholder, don't count it. @@ -92,6 +92,7 @@ struct xpvhv_aux { I32 xhv_name_count; struct mro_meta *xhv_mro_meta; HV * xhv_super; /* SUPER method cache */ + U32 xhv_rand; /* random value for hash traversal */ }; /* hash structure: */ diff --git a/intrpvar.h b/intrpvar.h index b5902a5a41..274e7af040 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -64,6 +64,7 @@ PERLVAR(I, markstack, I32 *) /* stack_sp locations we're PERLVAR(I, markstack_ptr, I32 *) PERLVAR(I, markstack_max, I32 *) +PERLVARI(I, hash_rand_bits, UV, 0) /* used to randomize hash stuff */ PERLVAR(I, strtab, HV *) /* shared string table */ /* Fields used by magic variables such as $@, $/ and so on */ @@ -5706,8 +5706,8 @@ STATIC void S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize) #define PERL_ARGS_ASSERT_HSPLIT \ assert(hv) -STATIC struct xpvhv_aux* S_hv_auxinit(HV *hv) - __attribute__nonnull__(1); +STATIC struct xpvhv_aux* S_hv_auxinit(pTHX_ HV *hv) + __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_HV_AUXINIT \ assert(hv) @@ -5736,6 +5736,7 @@ STATIC HE* S_new_he(pTHX) __attribute__malloc__ __attribute__warn_unused_result__; +STATIC U32 S_ptr_hash(PTRV u); STATIC SV * S_refcounted_he_value(pTHX_ const struct refcounted_he *he) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_REFCOUNTED_HE_VALUE \ diff --git a/t/op/smartkve.t b/t/op/smartkve.t index 7e5d67e7c1..abd6abfd53 100644 --- a/t/op/smartkve.t +++ b/t/op/smartkve.t @@ -23,12 +23,19 @@ sub j { join(":",@_) } # match the inserted order. So we declare one hash # and then make all our copies from that, which should # mean all the copies have the same internal structure. +# +# And these days, even if all that weren't true, we now +# per-hash randomize keys/values. So, we cant expect two +# hashes with the same internal structure to return the +# same thing at all. All we *can* expect is that keys() +# and values() use the same ordering. our %base_hash; BEGIN { # in BEGIN for "use constant ..." later - %base_hash= ( pi => 3.14, e => 2.72, i => -1 ); + # values match keys here so we can easily check that keys(%hash) == values(%hash) + %base_hash= ( pi => 'pi', e => 'e', i => 'i' ); $array = [ qw(pi e i) ]; - $values = [ 3.14, 2.72, -1 ]; + $values = [ qw(pi e i) ]; $hash = { %base_hash } ; $data = { hash => { %base_hash }, @@ -118,16 +125,25 @@ is(keys $obj->array ,3, 'Scalar: keys $obj->array'); # Keys -- list -$h_expect = j(keys %$hash); +$h_expect = j(sort keys %base_hash); $a_expect = j(keys @$array); -is(j(keys $hash) ,$h_expect, 'List: keys $hash'); -is(j(keys $data->{hash}) ,$h_expect, 'List: keys $data->{hash}'); -is(j(keys CONST_HASH) ,$h_expect, 'List: keys CONST_HASH'); -is(j(keys CONST_HASH()) ,$h_expect, 'List: keys CONST_HASH()'); -is(j(keys hash_sub) ,$h_expect, 'List: keys hash_sub'); -is(j(keys hash_sub()) ,$h_expect, 'List: keys hash_sub()'); -is(j(keys $obj->hash) ,$h_expect, 'List: keys $obj->hash'); +is(j(sort keys $hash) ,$h_expect, 'List: sort keys $hash'); +is(j(sort keys $data->{hash}) ,$h_expect, 'List: sort keys $data->{hash}'); +is(j(sort keys CONST_HASH) ,$h_expect, 'List: sort keys CONST_HASH'); +is(j(sort keys CONST_HASH()) ,$h_expect, 'List: sort keys CONST_HASH()'); +is(j(sort keys hash_sub) ,$h_expect, 'List: sort keys hash_sub'); +is(j(sort keys hash_sub()) ,$h_expect, 'List: sort keys hash_sub()'); +is(j(sort keys $obj->hash) ,$h_expect, 'List: sort keys $obj->hash'); + +is(j(keys $hash) ,j(values $hash), 'List: keys $hash == values $hash'); +is(j(keys $data->{hash}) ,j(values $data->{hash}), 'List: keys $data->{hash} == values $data->{hash}'); +is(j(keys CONST_HASH) ,j(values CONST_HASH), 'List: keys CONST_HASH == values CONST_HASH'); +is(j(keys CONST_HASH()) ,j(values CONST_HASH()), 'List: keys CONST_HASH() == values CONST_HASH()'); +is(j(keys hash_sub) ,j(values hash_sub), 'List: keys hash_sub == values hash_sub'); +is(j(keys hash_sub()) ,j(values hash_sub()), 'List: keys hash_sub() == values hash_sub()'); +is(j(keys $obj->hash) ,j(values $obj->hash), 'List: keys $obj->hash == values obj->hash'); + is(j(keys $array) ,$a_expect, 'List: keys $array'); is(j(keys $data->{array}) ,$a_expect, 'List: keys $data->{array}'); is(j(keys CONST_ARRAY) ,$a_expect, 'List: keys CONST_ARRAY'); @@ -221,16 +237,25 @@ is(values $obj->array ,3, 'Scalar: values $obj->array'); # Values -- list -$h_expect = j(values %$hash); +$h_expect = j(sort values %base_hash); $a_expect = j(values @$array); -is(j(values $hash) ,$h_expect, 'List: values $hash'); -is(j(values $data->{hash}) ,$h_expect, 'List: values $data->{hash}'); -is(j(values CONST_HASH) ,$h_expect, 'List: values CONST_HASH'); -is(j(values CONST_HASH()) ,$h_expect, 'List: values CONST_HASH()'); -is(j(values hash_sub) ,$h_expect, 'List: values hash_sub'); -is(j(values hash_sub()) ,$h_expect, 'List: values hash_sub()'); -is(j(values $obj->hash) ,$h_expect, 'List: values $obj->hash'); +is(j(sort values $hash) ,$h_expect, 'List: sort values $hash'); +is(j(sort values $data->{hash}) ,$h_expect, 'List: sort values $data->{hash}'); +is(j(sort values CONST_HASH) ,$h_expect, 'List: sort values CONST_HASH'); +is(j(sort values CONST_HASH()) ,$h_expect, 'List: sort values CONST_HASH()'); +is(j(sort values hash_sub) ,$h_expect, 'List: sort values hash_sub'); +is(j(sort values hash_sub()) ,$h_expect, 'List: sort values hash_sub()'); +is(j(sort values $obj->hash) ,$h_expect, 'List: sort values $obj->hash'); + +is(j(values $hash) ,j(keys $hash), 'List: values $hash == keys $hash'); +is(j(values $data->{hash}) ,j(keys $data->{hash}), 'List: values $data->{hash} == keys $data->{hash}'); +is(j(values CONST_HASH) ,j(keys CONST_HASH), 'List: values CONST_HASH == keys CONST_HASH'); +is(j(values CONST_HASH()) ,j(keys CONST_HASH()), 'List: values CONST_HASH() == keys CONST_HASH()'); +is(j(values hash_sub) ,j(keys hash_sub), 'List: values hash_sub == keys hash_sub'); +is(j(values hash_sub()) ,j(keys hash_sub()), 'List: values hash_sub() == keys hash_sub()'); +is(j(values $obj->hash) ,j(keys $obj->hash), 'List: values $obj->hash == keys $obj->hash'); + is(j(values $array) ,$a_expect, 'List: values $array'); is(j(values $data->{array}) ,$a_expect, 'List: values $data->{array}'); is(j(values CONST_ARRAY) ,$a_expect, 'List: values CONST_ARRAY'); @@ -5681,12 +5681,23 @@ Perl_get_hash_seed(pTHX_ unsigned char *seed_buffer) else #endif { + unsigned char *ptr= seed_buffer; (void)seedDrand01((Rand_seed_t)seed()); - while (seed_buffer < end) { - *seed_buffer++ = (unsigned char)(Drand01() * (U8_MAX+1)); + while (ptr < end) { + *ptr++ = (unsigned char)(Drand01() * (U8_MAX+1)); } - } + } + { /* initialize PL_hash_rand_bits from the hash seed. + * This value is highly volatile, it is updated every + * hash insert, and is used as part of hash bucket chain + * randomization and hash iterator randomization. */ + unsigned long i; + PL_hash_rand_bits= 0; + for( i = 0; i < sizeof(UV) ; i++ ) { + PL_hash_rand_bits = (PL_hash_rand_bits << 8) | seed_buffer[i % PERL_HASH_SEED_BYTES]; + } + } } #ifdef PERL_GLOBAL_STRUCT |