summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Guy <mjtg@cam.ac.uk>2001-04-06 13:47:06 +0100
committerJarkko Hietaniemi <jhi@iki.fi>2001-04-07 17:26:45 +0000
commitb8fa94d833cd9fcbc2e9db4b24f4b6f366a204a4 (patch)
treedd623e1786ae87d3b7f77bff50c1375b887dc580
parentd468ca0452c17d0537a54afdfb4073bba9b1b292 (diff)
downloadperl-b8fa94d833cd9fcbc2e9db4b24f4b6f366a204a4.tar.gz
Re: [PATCH dump.c] hash quality
Message-Id: <E14lUhm-0000rx-00@libra.cus.cam.ac.uk> p4raw-id: //depot/perl@9612
-rw-r--r--dump.c15
-rw-r--r--t/lib/peek.t2
2 files changed, 13 insertions, 4 deletions
diff --git a/dump.c b/dump.c
index 9010bc516e..9d2595f6a9 100644
--- a/dump.c
+++ b/dump.c
@@ -1044,15 +1044,24 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
}
}
PerlIO_putc(file, ')');
- /* Now calculate quality wrt theoretical value */
+ /* The "quality" of a hash is defined as the total number of
+ comparisons needed to access every element once, relative
+ to the expected number needed for a random hash.
+
+ The total number of comparisons is equal to the sum of
+ the squares of the number of entries in each backet.
+ For a random hash of n keys into k backets, the expected
+ value is
+ n + n(n-1)/2k
+ */
+
for (i = max; i > 0; i--) { /* Precision: count down. */
sum += freq[i] * i * i;
}
while ((keys = keys >> 1))
pow2 = pow2 << 1;
- /* Approximate by Poisson distribution */
theoret = HvKEYS(sv);
- theoret += theoret * theoret/pow2;
+ theoret += theoret * (theoret-1)/pow2;
PerlIO_putc(file, '\n');
Perl_dump_indent(aTHX_ level, file, " hash quality = %.1"NVff"%%", theoret/sum*100);
}
diff --git a/t/lib/peek.t b/t/lib/peek.t
index 7bf1793acb..96e24a2e4f 100644
--- a/t/lib/peek.t
+++ b/t/lib/peek.t
@@ -171,7 +171,7 @@ do_test(12,
IV = 1
NV = 0
ARRAY = $ADDR \\(0:7, 1:1\\)
- hash quality = 150.0%
+ hash quality = 100.0%
KEYS = 1
FILL = 1
MAX = 7