summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2017-04-23 11:58:24 +0200
committerYves Orton <demerphq@gmail.com>2017-04-23 11:58:24 +0200
commite4343ef32499562ce956ba3cb9cf4454d5d2ff7f (patch)
treee56359af9d8b657baf6ee9971f0a47d5eb069e07
parentca7eb79a236b41b7722c6800527f95cd76843eed (diff)
downloadperl-e4343ef32499562ce956ba3cb9cf4454d5d2ff7f.tar.gz
Revert "Tweak our hash bucket splitting rules"
This reverts commit 05f97de032fe95cabe8c9f6d6c0a5897b1616194. Accidentally pushed while waiting for blead-unfreeze.
-rw-r--r--ext/Hash-Util/t/Util.t4
-rw-r--r--ext/Hash-Util/t/builtin.t10
-rw-r--r--hv.c43
-rw-r--r--t/op/coreamp.t2
-rw-r--r--t/op/hash.t21
-rw-r--r--t/op/sub_lval.t2
6 files changed, 25 insertions, 57 deletions
diff --git a/ext/Hash-Util/t/Util.t b/ext/Hash-Util/t/Util.t
index c52a8e4b88..4a12fd1764 100644
--- a/ext/Hash-Util/t/Util.t
+++ b/ext/Hash-Util/t/Util.t
@@ -606,9 +606,9 @@ ok(defined($hash_seed) && $hash_seed ne '', "hash_seed $hash_seed");
my $array1= bucket_array({});
my $array2= bucket_array({1..10});
is("@info1","0 8 0");
- like("@info2[0,1]",qr/5 (?:8|16)/);
+ is("@info2[0,1]","5 8");
is("@stats1","0 8 0");
- like("@stats2[0,1]",qr/5 (?:8|16)/);
+ is("@stats2[0,1]","5 8");
my @keys1= sort map { ref $_ ? @$_ : () } @$array1;
my @keys2= sort map { ref $_ ? @$_ : () } @$array2;
is("@keys1","");
diff --git a/ext/Hash-Util/t/builtin.t b/ext/Hash-Util/t/builtin.t
index 0705f84206..3654c9bc1a 100644
--- a/ext/Hash-Util/t/builtin.t
+++ b/ext/Hash-Util/t/builtin.t
@@ -26,15 +26,13 @@ is(used_buckets(%hash), 1, "hash should have one used buckets");
$hash{$_}= $_ for 2..7;
-like(bucket_ratio(%hash), qr!/(?:8|16)!, "hash has expected number of buckets in bucket_ratio");
-my $num= num_buckets(%hash);
-ok(($num == 8 || $num == 16), "hash should have 8 or 16 buckets");
+like(bucket_ratio(%hash), qr!/8!, "hash has expected number of buckets in bucket_ratio");
+is(num_buckets(%hash), 8, "hash should have eight buckets");
cmp_ok(used_buckets(%hash), "<", 8, "hash should have one used buckets");
$hash{8}= 8;
-like(bucket_ratio(%hash), qr!/(?:8|16)!, "hash has expected number of buckets in bucket_ratio");
-$num= num_buckets(%hash);
-ok(($num == 8 || $num == 16), "hash should have 8 or 16 buckets");
+like(bucket_ratio(%hash), qr!/16!, "hash has expected number of buckets in bucket_ratio");
+is(num_buckets(%hash), 16, "hash should have sixteen buckets");
cmp_ok(used_buckets(%hash), "<=", 8, "hash should have at most 8 used buckets");
diff --git a/hv.c b/hv.c
index 3bd62c6f9d..85e42d13e0 100644
--- a/hv.c
+++ b/hv.c
@@ -34,11 +34,7 @@ holds the key and hash value.
#define PERL_HASH_INTERNAL_ACCESS
#include "perl.h"
-/* we split when we collide and we have a load factor over 0.667.
- * NOTE if you change this formula so we split earlier than previously
- * you MUST change the logic in hv_ksplit()
- */
-#define DO_HSPLIT(xhv) ( ((xhv)->xhv_keys + ((xhv)->xhv_keys >> 1)) > (xhv)->xhv_max )
+#define DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */
#define HV_FILL_THRESHOLD 31
static const char S_strtab_error[]
@@ -347,7 +343,6 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HE **oentry;
SV *sv;
bool is_utf8;
- bool in_collision;
int masked_flags;
const int return_svp = action & HV_FETCH_JUST_SV;
HEK *keysv_hek = NULL;
@@ -840,7 +835,6 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
* making it harder to see if there is a collision. We also
* reset the iterator randomizer if there is one.
*/
- in_collision = *oentry != NULL;
if ( *oentry && PL_HASH_RAND_BITS_ENABLED) {
PL_hash_rand_bits++;
PL_hash_rand_bits= ROTL_UV(PL_hash_rand_bits,1);
@@ -883,7 +877,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
HvHASKFLAGS_on(hv);
xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
- if ( in_collision && DO_HSPLIT(xhv) ) {
+ if ( DO_HSPLIT(xhv) ) {
const STRLEN oldsize = xhv->xhv_max + 1;
const U32 items = (U32)HvPLACEHOLDERS_get(hv);
@@ -1456,42 +1450,29 @@ void
Perl_hv_ksplit(pTHX_ HV *hv, IV newmax)
{
XPVHV* xhv = (XPVHV*)SvANY(hv);
- const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 */
+ const I32 oldsize = (I32) xhv->xhv_max+1; /* HvMAX(hv)+1 (sick) */
I32 newsize;
- I32 wantsize;
- I32 trysize;
char *a;
PERL_ARGS_ASSERT_HV_KSPLIT;
- wantsize = (I32) newmax; /* possible truncation here */
- if (wantsize != newmax)
+ newsize = (I32) newmax; /* possible truncation here */
+ if (newsize != newmax || newmax <= oldsize)
return;
-
- wantsize= wantsize + (wantsize >> 1); /* wantsize *= 1.5 */
- if (wantsize < newmax) /* overflow detection */
- return;
-
- newsize = oldsize;
- while (wantsize > newsize) {
- trysize = newsize << 1;
- if (trysize > newsize) {
- newsize = trysize;
- } else {
- /* we overflowed */
- return;
- }
+ while ((newsize & (1 + ~newsize)) != newsize) {
+ newsize &= ~(newsize & (1 + ~newsize)); /* get proper power of 2 */
}
-
- if (newsize <= oldsize)
- return; /* overflow detection */
+ if (newsize < newmax)
+ newsize *= 2;
+ if (newsize < newmax)
+ return; /* overflow detection */
a = (char *) HvARRAY(hv);
if (a) {
hsplit(hv, oldsize, newsize);
} else {
Newxz(a, PERL_HV_ARRAY_ALLOC_BYTES(newsize), char);
- xhv->xhv_max = newsize - 1;
+ xhv->xhv_max = --newsize;
HvARRAY(hv) = (HE **) a;
}
}
diff --git a/t/op/coreamp.t b/t/op/coreamp.t
index 277ac1094a..4b68569c87 100644
--- a/t/op/coreamp.t
+++ b/t/op/coreamp.t
@@ -639,7 +639,7 @@ SKIP: {
my %h = 1..2;
&mykeys(\%h) = 1024;
- like Hash::Util::bucket_ratio(%h), qr!/(?:1024|2048)\z!, '&mykeys = changed number of buckets allocated';
+ like Hash::Util::bucket_ratio(%h), qr|/1024\z|, '&mykeys = changed number of buckets allocated';
eval { (&mykeys(\%h)) = 1025; };
like $@, qr/^Can't modify keys in list assignment at /;
}
diff --git a/t/op/hash.t b/t/op/hash.t
index 0551e03ca2..a0e79c7396 100644
--- a/t/op/hash.t
+++ b/t/op/hash.t
@@ -163,8 +163,7 @@ sub torture_hash {
my ($h2, $h3, $h4);
while (keys %$h > 2) {
my $take = (keys %$h) / 2 - 1;
- my @keys = (sort keys %$h)[0..$take];
-
+ my @keys = (keys %$h)[0 .. $take];
my $scalar = %$h;
delete @$h{@keys};
push @groups, $scalar, \@keys;
@@ -179,19 +178,9 @@ sub torture_hash {
# Each time this will get emptied then repopulated. If the fill isn't reset
# when the hash is emptied, the used count will likely exceed the array
- use Devel::Peek;
%$h3 = %$h2;
- is(join(",", sort keys %$h3),join(",",sort keys %$h2),"$desc (+$count copy) has same keys");
my (undef, $total3) = validate_hash("$desc (+$count copy)", $h3);
- # We now only split when we collide on insert AND exceed the load factor
- # when we did so. Building a hash via %x=%y means a pseudo-random key
- # order inserting into %x, and we may end up encountering a collision
- # at a different point in the load order, resulting in a possible power of
- # two difference under the current load factor expectations. If this test
- # fails then it is probably because DO_HSPLIT was changed, and this test
- # needs to be adjusted accordingly.
- ok( $total2 == $total3 || $total2*2==$total3 || $total2==$total3*2,
- "$desc (+$count copy) array size within a power of 2 of each other");
+ is($total3, $total2, "$desc (+$count copy) has same array size");
# This might use fewer buckets than the original
%$h4 = %$h;
@@ -200,7 +189,7 @@ sub torture_hash {
}
my $scalar = %$h;
- my @keys = sort keys %$h;
+ my @keys = keys %$h;
delete @$h{@keys};
is(scalar %$h, 0, "scalar keys for empty $desc");
@@ -216,11 +205,11 @@ sub torture_hash {
while (@groups) {
my $keys = pop @groups;
++$h->{$_} foreach @$keys;
- my (undef, $total) = validate_hash($desc, $h);
+ my (undef, $total) = validate_hash("$desc " . keys %$h, $h);
is($total, $total0, "bucket count is constant when rebuilding");
is(scalar %$h, pop @groups, "scalar keys is identical when rebuilding");
++$h1->{$_} foreach @$keys;
- validate_hash("$desc copy", $h1);
+ validate_hash("$desc copy " . keys %$h1, $h1);
}
# This will fail if the fill count isn't handled correctly on hash split
is(scalar %$h1, scalar %$h, "scalar keys is identical on copy and original");
diff --git a/t/op/sub_lval.t b/t/op/sub_lval.t
index 099bb649fd..bf1b49cbc1 100644
--- a/t/op/sub_lval.t
+++ b/t/op/sub_lval.t
@@ -557,7 +557,7 @@ SKIP: {
sub keeze : lvalue { keys %__ }
%__ = ("a","b");
keeze = 64;
- like Hash::Util::bucket_ratio(%__), qr!1/(?:64|128)!, 'keys assignment through lvalue sub';
+ is Hash::Util::bucket_ratio(%__), '1/64', 'keys assignment through lvalue sub';
eval { (keeze) = 64 };
like $@, qr/^Can't modify keys in list assignment at /,
'list assignment to keys through lv sub is forbidden';