Change scalar(%hash) to be the same as 0+keys(%hash)

This subject has a long history see [perl #114576] for more discussion. https://rt.perl.org/Public/Bug/Display.html?id=114576 There are a variety of reasons we want to change the return signature of scalar(%hash). One is that it leaks implementation details about our associative array structure. Another is that it requires us to keep track of the used buckets in the hash, which we use for no other purpose but for scalar(%hash). Another is that it is just odd. Almost nothing needs to know these values. Perhaps debugging, but we have several much better functions for introspecting the internals of a hash. By changing the return signature we can remove all the logic related to maintaining and updating xhv_fill_lazy. This should make hot code paths a little faster, and maybe save some memory for traversed hashes. In order to provide some form of backwards compatibility we adds three new functions to the Hash::Util namespace: bucket_ratio(), num_buckets() and used_buckets(). These functions are actually implemented in universal.c, and thus always available even if Hash::Util is not loaded. This simplifies testing. At the same time Hash::Util contains backwards compatible code so that the new functions are available from it should they be needed in older perls. There are many tests in t/op/hash.t that are more or less obsolete after this patch as they test that xhv_fill_lazy is correctly set in various situations. However since we have a backwards compat layer we can just switch them to use bucket_ratio(%hash) instead of scalar(%hash) and keep the tests, just in case they are actually testing something not tested elsewhere.
author: Yves Orton <demerphq@gmail.com> 2016-06-20 22:51:38 +0200
committer: Yves Orton <demerphq@gmail.com> 2016-06-22 18:21:32 +0200
commit: 8bf4c4010cc474d4000c2a8c78f6890fa5f1e577 (patch)
tree: b12d25aa70138f2dbc13bf1bb49a93fab7e7a4b7
parent: 6c50b67b99a3df9486896d14dc294825a148a673 (diff)
download: perl-8bf4c4010cc474d4000c2a8c78f6890fa5f1e577.tar.gz
22 files changed, 325 insertions, 128 deletions
diff --git a/MANIFEST b/MANIFEST
index 25252dfa89..abafd3b1ad 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3858,6 +3858,7 @@ ext/Hash-Util-FieldHash/t/11_hashassign.t	Adapted from t/op/hashassign.t
 ext/Hash-Util-FieldHash/t/12_hashwarn.t		Adapted from t/op/hashwarn.t
 ext/Hash-Util/lib/Hash/Util.pm	Hash::Util
 ext/Hash-Util/Makefile.PL	Makefile for Hash::Util
+ext/Hash-Util/t/builtin.t	See if Hash::Util builtin exports work as expected
 ext/Hash-Util/t/Util.t		See if Hash::Util works
 ext/Hash-Util/Util.xs		XS bits of Hash::Util
 ext/I18N-Langinfo/Langinfo.pm	I18N::Langinfo
diff --git a/dump.c b/dump.c
index 8e11546029..c168162791 100644
--- a/dump.c
+++ b/dump.c
@@ -1761,15 +1761,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
                 } while (++ents <= last);
             }
 
-            if (SvOOK(sv)) {
-                struct xpvhv_aux *const aux = HvAUX(sv);
-                Perl_dump_indent(aTHX_ level, file, "  FILL = %"UVuf
-                                 " (cached = %"UVuf")\n",
-                                 (UV)count, (UV)aux->xhv_fill_lazy);
-            } else {
-                Perl_dump_indent(aTHX_ level, file, "  FILL = %"UVuf"\n",
-                                 (UV)count);
-            }
+            Perl_dump_indent(aTHX_ level, file, "  FILL = %"UVuf"\n",
+                             (UV)count);
         }
 	Perl_dump_indent(aTHX_ level, file, "  MAX = %"IVdf"\n", (IV)HvMAX(sv));
         if (SvOOK(sv)) {
diff --git a/embed.fnc b/embed.fnc
index e4bfdee853..d00f41b04e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2774,6 +2774,7 @@ Apod	|void	|hv_assert	|NN HV *hv
 #endif
 
 ApdR	|SV*	|hv_scalar	|NN HV *hv
+ApdRMD	|SV*	|hv_bucket_ratio|NN HV *hv
 ApoR	|I32*	|hv_riter_p	|NN HV *hv
 ApoR	|HE**	|hv_eiter_p	|NN HV *hv
 Apo	|void	|hv_riter_set	|NN HV *hv|I32 riter
diff --git a/embed.h b/embed.h
index f32f31733d..84f647e53c 100644
--- a/embed.h
+++ b/embed.h
@@ -217,6 +217,7 @@
 #define gv_stashpv(a,b)		Perl_gv_stashpv(aTHX_ a,b)
 #define gv_stashpvn(a,b,c)	Perl_gv_stashpvn(aTHX_ a,b,c)
 #define gv_stashsv(a,b)		Perl_gv_stashsv(aTHX_ a,b)
+#define hv_bucket_ratio(a)	Perl_hv_bucket_ratio(aTHX_ a)
 #define hv_clear(a)		Perl_hv_clear(aTHX_ a)
 #define hv_clear_placeholders(a)	Perl_hv_clear_placeholders(aTHX_ a)
 #define hv_common(a,b,c,d,e,f,g,h)	Perl_hv_common(aTHX_ a,b,c,d,e,f,g,h)
diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t
index 56522af1e8..41898fea85 100644
--- a/ext/Devel-Peek/t/Peek.t
+++ b/ext/Devel-Peek/t/Peek.t
@@ -783,7 +783,7 @@ do_test('ENAME on a stash',
     AUX_FLAGS = 0                               # $] > 5.019008
     ARRAY = $ADDR
     KEYS = 0
-    FILL = 0 \(cached = 0\)
+    FILL = 0
     MAX = 7
     RITER = -1
     EITER = 0x0
@@ -806,7 +806,7 @@ do_test('ENAMEs on a stash',
     AUX_FLAGS = 0                               # $] > 5.019008
     ARRAY = $ADDR
     KEYS = 0
-    FILL = 0 \(cached = 0\)
+    FILL = 0
     MAX = 7
     RITER = -1
     EITER = 0x0
@@ -832,7 +832,7 @@ do_test('ENAMEs on a stash with no NAME',
     AUX_FLAGS = 0                               # $] > 5.019008
     ARRAY = $ADDR
     KEYS = 0
-    FILL = 0 \(cached = 0\)
+    FILL = 0
     MAX = 7
     RITER = -1
     EITER = 0x0
@@ -882,7 +882,7 @@ do_test('small hash after keys',
     ARRAY = $ADDR  \\(0:[67],.*\\)
     hash quality = [0-9.]+%
     KEYS = 2
-    FILL = [12] \\(cached = 0\\)
+    FILL = [12]
     MAX = 7
     RITER = -1
     EITER = 0x0
@@ -912,7 +912,7 @@ do_test('small hash after keys and scalar',
     ARRAY = $ADDR  \\(0:[67],.*\\)
     hash quality = [0-9.]+%
     KEYS = 2
-    FILL = ([12]) \\(cached = \1\\)
+    FILL = ([12])
     MAX = 7
     RITER = -1
     EITER = 0x0
@@ -927,30 +927,6 @@ do_test('small hash after keys and scalar',
       COW_REFCNT = 1
 ){2}');
 
-# This should immediately start with the FILL cached correctly.
-my %large = (0..1999);
-$b = %large;
-do_test('large hash',
-        \%large,
-'SV = $RV\\($ADDR\\) at $ADDR
-  REFCNT = 1
-  FLAGS = \\(ROK\\)
-  RV = $ADDR
-  SV = PVHV\\($ADDR\\) at $ADDR
-    REFCNT = 2
-    FLAGS = \\($PADMY,OOK,SHAREKEYS\\)
-    AUX_FLAGS = 0                               # $] > 5.019008
-    ARRAY = $ADDR  \\(0:\d+,.*\\)
-    hash quality = \d+\\.\d+%
-    KEYS = 1000
-    FILL = (\d+) \\(cached = \1\\)
-    MAX = 1023
-    RITER = -1
-    EITER = 0x0
-    RAND = $ADDR
-    Elt .*
-');
-
 # Dump with arrays, hashes, and operator return values
 @array = 1..3;
 do_test('Dump @array', '@array', <<'ARRAY', '', '', 1);
diff --git a/ext/Hash-Util-FieldHash/t/05_perlhook.t b/ext/Hash-Util-FieldHash/t/05_perlhook.t
index 61d02ec646..ab3d74ba57 100644
--- a/ext/Hash-Util-FieldHash/t/05_perlhook.t
+++ b/ext/Hash-Util-FieldHash/t/05_perlhook.t
@@ -103,9 +103,9 @@ sub numbers_first { # Sort helper: All digit entries sort in front of others
     is( $counter, 1, "list each doesn't trigger");
     is( "@x", "abc 123", "the return is correct");
 
-    $x = %h;
+    $x = scalar %h;
     is( $counter, 1, "hash in scalar context doesn't trigger");
-    like( $x, qr!^\d+/\d+$!, "correct result");
+    is( $x, 1, "correct result");
 
     (@x) = %h;
     is( $counter, 1, "hash in list context doesn't trigger");
diff --git a/ext/Hash-Util/Changes b/ext/Hash-Util/Changes
index ddef72cea6..beb3f7eb40 100644
--- a/ext/Hash-Util/Changes
+++ b/ext/Hash-Util/Changes
@@ -1,5 +1,10 @@
 Revision history for Perl extension Hash::Util.
 
+0.20
+    Add bucket_ratio, num_buckets, used_buckets as a back-compat
+    shin for 5.25 where we remove the bucket data from scalar(%hash)
+    by making it return the count of keys by default.
+
 0.17
     Add bucket_stats_formatted() as utility method to Hash::Util
     Bug fixes to hash_stats()
diff --git a/ext/Hash-Util/Util.xs b/ext/Hash-Util/Util.xs
index 9481dc7997..01f52bfe44 100644
--- a/ext/Hash-Util/Util.xs
+++ b/ext/Hash-Util/Util.xs
@@ -263,3 +263,53 @@ bucket_array(rhv)
     }
     XSRETURN(0);
 }
+
+#if PERL_VERSION < 25
+SV*
+bucket_ratio(rhv)
+        SV* rhv
+    PROTOTYPE: \%
+    PPCODE:
+{
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            SV *ret= Perl_hv_scalar(aTHX_ (HV*)rhv);
+            ST(0)= ret;
+            XSRETURN(1);
+        }
+    }
+    XSRETURN_UNDEF;
+}
+
+SV*
+num_buckets(rhv)
+        SV* rhv
+    PROTOTYPE: \%
+    PPCODE:
+{
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            XSRETURN_UV(HvMAX((HV*)rhv)+1);
+        }
+    }
+    XSRETURN_UNDEF;
+}
+
+SV*
+used_buckets(rhv)
+        SV* rhv
+    PROTOTYPE: \%
+    PPCODE:
+{
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            XSRETURN_UV(HvFILL((HV*)rhv));
+        }
+    }
+    XSRETURN_UNDEF;
+}
+
+#endif
diff --git a/ext/Hash-Util/lib/Hash/Util.pm b/ext/Hash-Util/lib/Hash/Util.pm
index a947b9a76e..ff6b3b8d55 100644
--- a/ext/Hash-Util/lib/Hash/Util.pm
+++ b/ext/Hash-Util/lib/Hash/Util.pm
@@ -34,8 +34,12 @@ our @EXPORT_OK  = qw(
                      lock_hashref_recurse unlock_hashref_recurse
 
                      hash_traversal_mask
+
+                     bucket_ratio
+                     used_buckets
+                     num_buckets
                     );
-our $VERSION = '0.19';
+our $VERSION = '0.20';
 require XSLoader;
 XSLoader::load();
 
@@ -727,6 +731,29 @@ order. B<Note> that this does B<not> guarantee that B<two> hashes will produce
 the same key order for the same hash seed and traversal mask, items that
 collide into one bucket may have different orders regardless of this setting.
 
+=item B<bucket_ratio>
+
+This function behaves the same way that scalar(%hash) behaved prior to
+Perl 5.25. Specifically if the hash is tied, then it calls the SCALAR tied
+hash method, if untied then if the hash is empty it return 0, otherwise it
+returns a string containing the number of used buckets in the hash,
+followed by a slash, followed by the total number of buckets in the hash.
+
+    my %hash=("foo"=>1);
+    print Hash::Util::bucket_ratio(%hash); # prints "1/8"
+
+=item B<used_buckets>
+
+This function returns the count of used buckets in the hash. It is expensive
+to calculate and the value is NOT cached, so avoid use of this function
+in production code.
+
+=item B<num_buckets>
+
+This function returns the total number of buckets the hash holds, or would
+hold if the array were created. (When a hash is freshly created the array
+may not be allocated even though this value will be non-zero.)
+
 =back
 
 =head2 Operating on references to hashes.
diff --git a/ext/Hash-Util/t/builtin.t b/ext/Hash-Util/t/builtin.t
new file mode 100644
index 0000000000..3654c9bc1a
--- /dev/null
+++ b/ext/Hash-Util/t/builtin.t
@@ -0,0 +1,38 @@
+#!/usr/bin/perl -Tw
+
+use strict;
+use Test::More;
+
+my @Exported_Funcs;
+BEGIN {
+    @Exported_Funcs = qw( bucket_ratio num_buckets used_buckets );
+    plan tests => 13 + @Exported_Funcs;
+    use_ok 'Hash::Util', @Exported_Funcs;
+}
+foreach my $func (@Exported_Funcs) {
+    can_ok __PACKAGE__, $func;
+}
+
+my %hash;
+
+is(bucket_ratio(%hash), 0, "Empty hash has no bucket_ratio");
+is(num_buckets(%hash), 8, "Empty hash should have eight buckets");
+is(used_buckets(%hash), 0, "Empty hash should have no used buckets");
+
+$hash{1}= 1;
+is(bucket_ratio(%hash), "1/8", "hash has expected bucket_ratio");
+is(num_buckets(%hash), 8, "hash should have eight buckets");
+is(used_buckets(%hash), 1, "hash should have one used buckets");
+
+$hash{$_}= $_ for 2..7;
+
+like(bucket_ratio(%hash), qr!/8!, "hash has expected number of buckets in bucket_ratio");
+is(num_buckets(%hash), 8, "hash should have eight buckets");
+cmp_ok(used_buckets(%hash), "<", 8, "hash should have one used buckets");
+
+$hash{8}= 8;
+like(bucket_ratio(%hash), qr!/16!, "hash has expected number of buckets in bucket_ratio");
+is(num_buckets(%hash), 16, "hash should have sixteen buckets");
+cmp_ok(used_buckets(%hash), "<=", 8, "hash should have at most 8 used buckets");
+
+
diff --git a/hv.c b/hv.c
index 55234753ed..3b2523ba38 100644
--- a/hv.c
+++ b/hv.c
@@ -829,13 +829,6 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 	HeKEY_hek(entry) = save_hek_flags(key, klen, hash, flags);
     HeVAL(entry) = val;
 
-    if (!*oentry && SvOOK(hv)) {
-        /* initial entry, and aux struct present.  */
-        struct xpvhv_aux *const aux = HvAUX(hv);
-        if (aux->xhv_fill_lazy)
-            ++aux->xhv_fill_lazy;
-    }
-
 #ifdef PERL_HASH_RANDOMIZE_KEYS
     /* This logic semi-randomizes the insert order in a bucket.
      * Either we insert into the top, or the slot below the top,
@@ -937,8 +930,14 @@ S_hv_magic_check(HV *hv, bool *needs_copy, bool *needs_store)
 /*
 =for apidoc hv_scalar
 
-Evaluates the hash in scalar context and returns the result.  Handles magic
-when the hash is tied.
+Evaluates the hash in scalar context and returns the result.
+
+When the hash is tied dispatches through to the SCALAR method,
+otherwise returns a mortal SV containing the number of keys
+in the hash.
+
+Note, prior to 5.25 this function returned what is now
+returned by the hv_bucket_ratio() function.
 
 =cut
 */
@@ -957,7 +956,41 @@ Perl_hv_scalar(pTHX_ HV *hv)
     }
 
     sv = sv_newmortal();
-    if (HvTOTALKEYS((const HV *)hv)) 
+    sv_setuv(sv, HvUSEDKEYS(hv));
+
+    return sv;
+}
+
+/*
+=for apidoc Perl_hv_bucket_ratio
+
+If the hash is tied dispatches through to the SCALAR tied method,
+otherwise if the hash contains no keys returns 0, otherwise returns
+a mortal sv containing a string specifying the number of used buckets,
+followed by a slash, followed by the number of available buckets.
+
+This function is expensive, it must scan all of the buckets
+to determine which are used, and the count is NOT cached.
+In a large hash this could be a lot of buckets.
+
+=cut
+*/
+
+SV *
+Perl_hv_bucket_ratio(pTHX_ HV *hv)
+{
+    SV *sv;
+
+    PERL_ARGS_ASSERT_HV_BUCKET_RATIO;
+
+    if (SvRMAGICAL(hv)) {
+        MAGIC * const mg = mg_find((const SV *)hv, PERL_MAGIC_tied);
+        if (mg)
+            return magic_scalarpack(hv, mg);
+    }
+
+    sv = sv_newmortal();
+    if (HvUSEDKEYS((const HV *)hv))
         Perl_sv_setpvf(aTHX_ sv, "%ld/%ld",
                 (long)HvFILL(hv), (long)HvMAX(hv) + 1);
     else
@@ -1256,12 +1289,6 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 	    HvPLACEHOLDERS(hv)++;
 	else {
 	    *oentry = HeNEXT(entry);
-            if(!*first_entry && SvOOK(hv)) {
-                /* removed last entry, and aux struct present.  */
-                struct xpvhv_aux *const aux = HvAUX(hv);
-                if (aux->xhv_fill_lazy)
-                    --aux->xhv_fill_lazy;
-            }
 	    if (SvOOK(hv) && entry == HvAUX(hv)->xhv_eiter /* HvEITER(hv) */)
 		HvLAZYDEL_on(hv);
 	    else {
@@ -1353,10 +1380,6 @@ S_hsplit(pTHX_ HV *hv, STRLEN const oldsize, STRLEN newsize)
 #ifdef PERL_HASH_RANDOMIZE_KEYS
             dest->xhv_rand = (U32)PL_hash_rand_bits;
 #endif
-            /* For now, just reset the lazy fill counter.
-               It would be possible to update the counter in the code below
-               instead.  */
-            dest->xhv_fill_lazy = 0;
         } else {
             /* no existing aux structure, but we allocated space for one
              * so initialize it properly. This unrolls hv_auxinit() a bit,
@@ -1852,12 +1875,6 @@ Perl_hfree_next_entry(pTHX_ HV *hv, STRLEN *indexp)
             iter->xhv_last_rand = iter->xhv_rand;
 #endif
         }
-        /* Reset any cached HvFILL() to "unknown".  It's unlikely that anyone
-           will actually call HvFILL() on a hash under destruction, so it
-           seems pointless attempting to track the number of keys remaining.
-           But if they do, we want to reset it again.  */
-        if (iter->xhv_fill_lazy)
-            iter->xhv_fill_lazy = 0;
     }
 
     if (!((XPVHV*)SvANY(hv))->xhv_keys)
@@ -2002,17 +2019,15 @@ Perl_hv_undef_flags(pTHX_ HV *hv, U32 flags)
 /*
 =for apidoc hv_fill
 
-Returns the number of hash buckets that
-happen to be in use.  This function is
-wrapped by the macro C<HvFILL>.
+Returns the number of hash buckets that happen to be in use.
+
+This function is wrapped by the macro C<HvFILL>.
 
-Previously this value was always stored in the HV structure, which created an
-overhead on every hash (and pretty much every object) for something that was
-rarely used.  Now we calculate it on demand the first
-time that it is needed, and cache it if that calculation
-is going to be costly to repeat.  The cached
-value is updated by insertions and deletions, but (currently) discarded if
-the hash is split.
+As of perl 5.25 this function is used only for debugging
+purposes, and the number of used hash buckets is not
+in any way cached, thus this function can be costly
+to execute as it must iterate over all the buckets in the
+hash.
 
 =cut
 */
@@ -2022,7 +2037,6 @@ Perl_hv_fill(pTHX_ HV *const hv)
 {
     STRLEN count = 0;
     HE **ents = HvARRAY(hv);
-    struct xpvhv_aux *aux = SvOOK(hv) ? HvAUX(hv) : NULL;
 
     PERL_ARGS_ASSERT_HV_FILL;
 
@@ -2031,12 +2045,12 @@ Perl_hv_fill(pTHX_ HV *const hv)
     if (HvTOTALKEYS(hv) < 2)
         return HvTOTALKEYS(hv);
 
-#ifndef DEBUGGING
-    if (aux && aux->xhv_fill_lazy)
-        return aux->xhv_fill_lazy;
-#endif
-
     if (ents) {
+        /* I wonder why we count down here...
+         * Is it some micro-optimisation?
+         * I would have thought counting up was better.
+         * - Yves
+         */
 	HE *const *const last = ents + HvMAX(hv);
 	count = last + 1 - ents;
 
@@ -2045,16 +2059,6 @@ Perl_hv_fill(pTHX_ HV *const hv)
 		--count;
 	} while (++ents <= last);
     }
-    if (aux) {
-#ifdef DEBUGGING
-        if (aux->xhv_fill_lazy)
-            assert(aux->xhv_fill_lazy == count);
-#endif
-        aux->xhv_fill_lazy = count;
-    } else if (HvMAX(hv) >= HV_FILL_THRESHOLD) {
-        aux = hv_auxinit(hv);
-        aux->xhv_fill_lazy = count;
-    }        
     return count;
 }
 
@@ -2099,7 +2103,6 @@ S_hv_auxinit_internal(struct xpvhv_aux *iter) {
 #ifdef PERL_HASH_RANDOMIZE_KEYS
     iter->xhv_last_rand = iter->xhv_rand;
 #endif
-    iter->xhv_fill_lazy = 0;
     iter->xhv_name_u.xhvnameu_name = 0;
     iter->xhv_name_count = 0;
     iter->xhv_backreferences = 0;
@@ -2181,7 +2184,7 @@ Perl_hv_iterinit(pTHX_ HV *hv)
 	hv_auxinit(hv);
     }
 
-    /* used to be xhv->xhv_fill before 5.004_65 */
+    /* note this includes placeholders! */
     return HvTOTALKEYS(hv);
 }
 
diff --git a/hv.h b/hv.h
index b97b22425e..0e773f2020 100644
--- a/hv.h
+++ b/hv.h
@@ -119,7 +119,6 @@ struct xpvhv_aux {
     U32         xhv_last_rand;  /* last random value for hash traversal,
                                    used to detect each() after insert for warnings */
 #endif
-    U32         xhv_fill_lazy;
     U32         xhv_aux_flags;      /* assorted extra flags */
 };
 
diff --git a/pod/perldata.pod b/pod/perldata.pod
index 66bb206133..0ff6534572 100644
--- a/pod/perldata.pod
+++ b/pod/perldata.pod
@@ -400,17 +400,24 @@ leave nothing to doubt:
     $element_count = scalar(@whatever);
 
 If you evaluate a hash in scalar context, it returns false if the
-hash is empty.  If there are any key/value pairs, it returns true;
-more precisely, the value returned is a string consisting of the
+hash is empty.  If there are any key/value pairs, it returns true.
+A more precise definition is version dependent.
+
+Prior to Perl 5.25 the value returned was a string consisting of the
 number of used buckets and the number of allocated buckets, separated
 by a slash.  This is pretty much useful only to find out whether
 Perl's internal hashing algorithm is performing poorly on your data
 set.  For example, you stick 10,000 things in a hash, but evaluating
 %HASH in scalar context reveals C<"1/16">, which means only one out
 of sixteen buckets has been touched, and presumably contains all
-10,000 of your items.  This isn't supposed to happen.  If a tied hash
-is evaluated in scalar context, the C<SCALAR> method is called (with a
-fallback to C<FIRSTKEY>).
+10,000 of your items.  This isn't supposed to happen.
+
+As of Perl 5.25 the return was changed to be the count of keys in the
+hash. If you need access to the old behavior you can use
+C<Hash::Util::bucket_ratio()> instead.
+
+If a tied hash is evaluated in scalar context, the C<SCALAR> method is
+called (with a fallback to C<FIRSTKEY>).
 X<hash, scalar context> X<hash, bucket> X<bucket>
 
 You can preallocate space for a hash by assigning to the keys() function.
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 7c05104f42..53d839ae6d 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -100,6 +100,16 @@ XXX
 
 =back
 
+=head2 C<scalar(%hash)> return signature changed
+
+The value returned for C<scalar(%hash)> will no longer show information
+about the buckets allocated in the hash. It will simply return the count
+of used keys. It is thus equivalent to C<0+keys(%hash)>.
+
+A form of backwards compatibility is provided via C<Hash::Util::bucket_ratio()>
+which provides the same behavior as scalar(%hash) provided prior to
+Perl 5.25.
+
 =head1 Modules and Pragmata
 
 XXX All changes to installed files in F<cpan/>, F<dist/>, F<ext/> and F<lib/>
diff --git a/pod/perltie.pod b/pod/perltie.pod
index 7b89f570ad..87a21268ed 100644
--- a/pod/perltie.pod
+++ b/pod/perltie.pod
@@ -828,6 +828,11 @@ referenced by C<$self-E<gt>{LIST}>:
 	return scalar %{ $self->{LIST} }
     }
 
+NOTE: In perl 5.25 the behavior of scalar %hash on an untied hash changed
+to return the count of keys. Prior to this it returned a string containing
+information about the bucket setup of the hash. See
+L<Hash::Util/bucket_ratio> for a backwards compatibility path.
+
 =item UNTIE this
 X<UNTIE>
 
@@ -1196,10 +1201,11 @@ modules L<Tie::Scalar>, L<Tie::Array>, L<Tie::Hash>, or L<Tie::Handle>.
 
 =head1 BUGS
 
-The bucket usage information provided by C<scalar(%hash)> is not
+The normal return provided by C<scalar(%hash)> is not
 available.  What this means is that using %tied_hash in boolean
 context doesn't work right (currently this always tests false,
 regardless of whether the hash is empty or hash elements).
+[ This paragraph needs review in light of changes in 5.25 ]
 
 Localizing tied arrays or hashes does not work.  After exiting the
 scope the arrays or the hashes are not restored.
diff --git a/proto.h b/proto.h
index 369da2c766..86e9480a58 100644
--- a/proto.h
+++ b/proto.h
@@ -1117,6 +1117,12 @@ PERL_CALLCONV void	Perl_gv_try_downgrade(pTHX_ GV* gv);
 PERL_CALLCONV AV**	Perl_hv_backreferences_p(pTHX_ HV *hv);
 #define PERL_ARGS_ASSERT_HV_BACKREFERENCES_P	\
 	assert(hv)
+PERL_CALLCONV SV*	Perl_hv_bucket_ratio(pTHX_ HV *hv)
+			__attribute__deprecated__
+			__attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_HV_BUCKET_RATIO	\
+	assert(hv)
+
 PERL_CALLCONV void	Perl_hv_clear(pTHX_ HV *hv);
 PERL_CALLCONV void	Perl_hv_clear_placeholders(pTHX_ HV *hv);
 #define PERL_ARGS_ASSERT_HV_CLEAR_PLACEHOLDERS	\
diff --git a/sv.c b/sv.c
index b0fdd153c0..cbdb28e92f 100644
--- a/sv.c
+++ b/sv.c
@@ -13860,7 +13860,6 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
 			}
 			daux->xhv_name_count = saux->xhv_name_count;
 
-			daux->xhv_fill_lazy = saux->xhv_fill_lazy;
 			daux->xhv_aux_flags = saux->xhv_aux_flags;
 #ifdef PERL_HASH_RANDOMIZE_KEYS
 			daux->xhv_rand = saux->xhv_rand;
diff --git a/t/op/coreamp.t b/t/op/coreamp.t
index e35f4f3f34..cca23f36b5 100644
--- a/t/op/coreamp.t
+++ b/t/op/coreamp.t
@@ -634,7 +634,7 @@ lis [&mykeys([ 1..4 ])], [0..3], '&mykeys(\@array) in list cx';
 {
   my %h = 1..2;
   &mykeys(\%h) = 1024;
-  like %h, qr|/1024\z|, '&mykeys = ...';
+  like Hash::Util::bucket_ratio(%h), qr|/1024\z|, '&mykeys = changed number of buckets allocated';
   eval { (&mykeys(\%h)) = 1025; };
   like $@, qr/^Can't modify keys in list assignment at /;
 }
diff --git a/t/op/each.t b/t/op/each.t
index b33fbac6ce..0d342a2f30 100644
--- a/t/op/each.t
+++ b/t/op/each.t
@@ -60,19 +60,19 @@ is ($i, 30, "each count");
 @keys = ('blurfl', keys(%h), 'dyick');
 is ($#keys, 31, "added a key");
 
-$size = ((split('/',scalar %h))[1]);
+$size = Hash::Util::num_buckets(%h);
 keys %h = $size * 5;
-$newsize = ((split('/',scalar %h))[1]);
+$newsize = Hash::Util::num_buckets(%h);
 is ($newsize, $size * 8, "resize");
 keys %h = 1;
-$size = ((split('/',scalar %h))[1]);
+$size = Hash::Util::num_buckets(%h);
 is ($size, $newsize, "same size");
 %h = (1,1);
-$size = ((split('/',scalar %h))[1]);
+$size = Hash::Util::num_buckets(%h);
 is ($size, $newsize, "still same size");
 undef %h;
 %h = (1,1);
-$size = ((split('/',scalar %h))[1]);
+$size = Hash::Util::num_buckets(%h);
 is ($size, 8, "size 8");
 
 # test scalar each
@@ -98,11 +98,13 @@ $total = 0;
 $total += $key while $key = each %hash;
 is ($total, 100, "test values keys resets iterator");
 
-$size = (split('/', scalar %hash))[1];
+$size = Hash::Util::num_buckets(%hash);
 keys(%hash) = $size / 2;
-is ($size, (split('/', scalar %hash))[1]);
+is ($size, Hash::Util::num_buckets(%hash),
+    "assign to keys does not shrink hash bucket array");
 keys(%hash) = $size + 100;
-isnt ($size, (split('/', scalar %hash))[1]);
+isnt ($size, Hash::Util::num_buckets(%hash),
+    "assignment to keys of a number not large enough does not change size");
 
 is (keys(%hash), 10, "keys (%hash)");
 
@@ -191,14 +193,14 @@ for my $k (qw(each keys values)) {
     my ($k2,$v2)=each(%foo);
     my $rest=0;
     while (each(%foo)) {$rest++};
-    is($yes,1,"if(%foo) was true");
-    isnt($k1,$k2,"if(%foo) didnt mess with each (key)");
-    isnt($v1,$v2,"if(%foo) didnt mess with each (value)");
-    is($rest,3,"Got the expect number of keys");
+    is($yes,1,"if(%foo) was true - my");
+    isnt($k1,$k2,"if(%foo) didnt mess with each (key) - my");
+    isnt($v1,$v2,"if(%foo) didnt mess with each (value) - my");
+    is($rest,3,"Got the expected number of keys - my");
     my $hsv=1 && %foo;
-    like($hsv,qr[/],"Got bucket stats from %foo in scalar assignment context");
+    is($hsv,$count,"Got the count of keys from %foo in scalar assignment context - my");
     my @arr=%foo&&%foo;
-    is(@arr,10,"Got expected number of elements in list context");
+    is(@arr,10,"Got expected number of elements in list context - my");
 }    
 {
     our %foo=(1..10);
@@ -210,14 +212,14 @@ for my $k (qw(each keys values)) {
     my ($k2,$v2)=each(%foo);
     my $rest=0;
     while (each(%foo)) {$rest++};
-    is($yes,1,"if(%foo) was true");
-    isnt($k1,$k2,"if(%foo) didnt mess with each (key)");
-    isnt($v1,$v2,"if(%foo) didnt mess with each (value)");
-    is($rest,3,"Got the expect number of keys");
+    is($yes,1,"if(%foo) was true - our");
+    isnt($k1,$k2,"if(%foo) didnt mess with each (key) - our");
+    isnt($v1,$v2,"if(%foo) didnt mess with each (value) - our");
+    is($rest,3,"Got the expected number of keys - our");
     my $hsv=1 && %foo;
-    like($hsv,qr[/],"Got bucket stats from %foo in scalar assignment context");
+    is($hsv,$count,"Got the count of keys from %foo in scalar assignment context - our");
     my @arr=%foo&&%foo;
-    is(@arr,10,"Got expected number of elements in list context");
+    is(@arr,10,"Got expected number of elements in list context - our");
 }    
 {
     # make sure a deleted active iterator gets freed timely, even if the
diff --git a/t/op/hash.t b/t/op/hash.t
index b4d6c2585f..3c083e0281 100644
--- a/t/op/hash.t
+++ b/t/op/hash.t
@@ -127,10 +127,19 @@ sub validate_hash {
   my ($desc, $h) = @_;
   local $::Level = $::Level + 1;
 
-  my $scalar = %$h;
+  # test that scalar(%hash) works as expected, which as of perl 5.25 is
+  # the same as 0+keys %hash;
+  my $scalar= scalar %$h;
+  my $count= 0+keys %$h;
+
+  is($scalar, $count, "$desc scalar() should be the same as 0+keys() as of perl 5.25");
+
+  # back compat tests, via Hash::Util::bucket_ratio();
+  my $ratio = Hash::Util::bucket_ratio(%$h);
   my $expect = qr!\A(\d+)/(\d+)\z!;
-  like($scalar, $expect, "$desc in scalar context matches pattern");
-  my ($used, $total) = $scalar =~ $expect;
+  like($ratio, $expect, "$desc bucket_ratio matches pattern");
+  my ($used, $total)= (0,0);
+  ($used, $total)= ($1,$2) if $ratio =~ /$expect/;
   cmp_ok($total, '>', 0, "$desc has >0 array size ($total)");
   cmp_ok($used, '>', 0, "$desc uses >0 heads ($used)");
   cmp_ok($used, '<=', $total,
diff --git a/t/op/sub_lval.t b/t/op/sub_lval.t
index dd0805fb34..eb33027ea5 100644
--- a/t/op/sub_lval.t
+++ b/t/op/sub_lval.t
@@ -552,7 +552,7 @@ is("@p", "1 8");
 sub keeze : lvalue { keys %__ }
 %__ = ("a","b");
 keeze = 64;
-is scalar %__, '1/64', 'keys assignment through lvalue sub';
+is Hash::Util::bucket_ratio(%__), '1/64', 'keys assignment through lvalue sub';
 eval { (keeze) = 64 };
 like $@, qr/^Can't modify keys in list assignment at /,
   'list assignment to keys through lv sub is forbidden';
diff --git a/universal.c b/universal.c
index 31a53cc0b5..0fcaea7701 100644
--- a/universal.c
+++ b/universal.c
@@ -766,6 +766,67 @@ XS(XS_PerlIO_get_layers)
     XSRETURN(0);
 }
 
+XS(XS_hash_util_bucket_ratio); /* prototype to pass -Wmissing-prototypes */
+XS(XS_hash_util_bucket_ratio)
+{
+    dXSARGS;
+    SV *rhv;
+    PERL_UNUSED_VAR(cv);
+
+    if (items != 1)
+        croak_xs_usage(cv, "hv");
+
+    rhv= ST(0);
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            SV *ret= Perl_hv_bucket_ratio(aTHX_ (HV*)rhv);
+            ST(0)= ret;
+            XSRETURN(1);
+        }
+    }
+    XSRETURN_UNDEF;
+}
+
+XS(XS_hash_util_num_buckets); /* prototype to pass -Wmissing-prototypes */
+XS(XS_hash_util_num_buckets)
+{
+    dXSARGS;
+    SV *rhv;
+    PERL_UNUSED_VAR(cv);
+
+    if (items != 1)
+        croak_xs_usage(cv, "hv");
+
+    rhv= ST(0);
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            XSRETURN_UV(HvMAX((HV*)rhv)+1);
+        }
+    }
+    XSRETURN_UNDEF;
+}
+
+XS(XS_hash_util_used_buckets); /* prototype to pass -Wmissing-prototypes */
+XS(XS_hash_util_used_buckets)
+{
+    dXSARGS;
+    SV *rhv;
+    PERL_UNUSED_VAR(cv);
+
+    if (items != 1)
+        croak_xs_usage(cv, "hv");
+
+    rhv= ST(0);
+    if (SvROK(rhv)) {
+        rhv= SvRV(rhv);
+        if ( SvTYPE(rhv)==SVt_PVHV ) {
+            XSRETURN_UV(HvFILL((HV*)rhv));
+        }
+    }
+    XSRETURN_UNDEF;
+}
 
 XS(XS_re_is_regexp); /* prototype to pass -Wmissing-prototypes */
 XS(XS_re_is_regexp)
@@ -1023,6 +1084,9 @@ static const struct xsub_details details[] = {
     {"Internals::SvREFCNT", XS_Internals_SvREFCNT, "\\[$%@];$"},
     {"Internals::hv_clear_placeholders", XS_Internals_hv_clear_placehold, "\\%"},
     {"PerlIO::get_layers", XS_PerlIO_get_layers, "*;@"},
+    {"Hash::Util::bucket_ratio", XS_hash_util_bucket_ratio, "\\%"},
+    {"Hash::Util::num_buckets", XS_hash_util_num_buckets, "\\%"},
+    {"Hash::Util::used_buckets", XS_hash_util_used_buckets, "\\%"},
     {"re::is_regexp", XS_re_is_regexp, "$"},
     {"re::regname", XS_re_regname, ";$$"},
     {"re::regnames", XS_re_regnames, ";$"},
author	Yves Orton <demerphq@gmail.com>	2016-06-20 22:51:38 +0200
committer	Yves Orton <demerphq@gmail.com>	2016-06-22 18:21:32 +0200
commit	8bf4c4010cc474d4000c2a8c78f6890fa5f1e577 (patch)
tree	b12d25aa70138f2dbc13bf1bb49a93fab7e7a4b7
parent	6c50b67b99a3df9486896d14dc294825a148a673 (diff)
download	perl-8bf4c4010cc474d4000c2a8c78f6890fa5f1e577.tar.gz