summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2016-02-21 20:03:16 +0200
committerArnold D. Robbins <arnold@skeeve.com>2017-01-19 06:09:04 +0200
commitbaadccc7297fa9a0cd1bcc276385872fa0ca8b6e (patch)
tree4853e6e0ef0a4dc2cd0edca52de3ea57ba79b39e
parent35c461c3b2c9cc56e22a5360c36b5e6dc9fccd28 (diff)
downloadgawk-baadccc7297fa9a0cd1bcc276385872fa0ca8b6e.tar.gz
Improve random number generator's period using a shuffle buffer.
-rw-r--r--ChangeLog9
-rw-r--r--support/random.c104
-rw-r--r--test/ChangeLog4
-rw-r--r--test/rand.ok2
4 files changed, 116 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 3650a30a..62b9e858 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1407,6 +1407,15 @@
* configure.ac (AM_GNU_GETTEXT_VERSION): Bump to 0.19.7.
+2016-02-21 Nelson H.F. Beebe <beebe@math.utah.edu>
+
+ * random.c [SHUFFLE_BITS, SHUFFLE_MAX, SHUFFLE_MASK]: New macros.
+ (shuffle_init, shuffle_buffer): New static variables.
+ (random_old): Renamed from random.
+ (random): New function wrapping random_old and providing a
+ shuffle buffer to increase the period. See the literature citations
+ and other notes in the code.
+
2016-02-21 Arnold D. Robbins <arnold@skeeve.com>
* regexec.c (prune_impossible_nodes): Remove attribute that
diff --git a/support/random.c b/support/random.c
index cba1b6bc..1aab2b50 100644
--- a/support/random.c
+++ b/support/random.c
@@ -60,6 +60,8 @@ static const char sccsid[] = "@(#)random.c 8.2 (Berkeley) 5/19/95";
#include <unistd.h>
#endif
+#include <assert.h>
+
#include "random.h" /* gawk addition */
#ifdef HAVE_SYS_TIME_H /* gawk addition */
@@ -137,8 +139,20 @@ __FBSDID("$FreeBSD: /repoman/r/ncvs/src/lib/libc/stdlib/random.c,v 1.24 2004/01/
* 32 bytes, a simple linear congruential R.N.G. is used." For a buffer of
* 128 bytes, this new version runs about 19 percent faster and for a 16
* byte buffer it is about 5 percent faster.
+ *
+ * Modified 06 February 2016 by Nelson H. F. Beebe to interface to a
+ * shuffle buffer, producing a huge period, and removing long-range
+ * correlations of the basic low-level generator. See comments and
+ * literature references in random() at the end of this file.
*/
+#define SHUFFLE_BITS 9 /* see comments in random() below for this choice */
+#define SHUFFLE_MAX (1 << SHUFFLE_BITS) /* MUST be power of two */
+#define SHUFFLE_MASK (SHUFFLE_MAX - 1) /* (k & SHUFFLE_MASK) is in [0, SHUFFLE_MAX - 1] */
+
+static int shuffle_init = 1;
+static long shuffle_buffer[SHUFFLE_MAX];
+
/*
* For each of the currently supported random number generators, we have a
* break value on the amount of state information (you need at least this
@@ -307,6 +321,8 @@ srandom(x)
{
int i, lim;
+ shuffle_init = 1;
+
state[0] = (uint32_t)x;
if (rand_type == TYPE_0)
lim = NSHUFF;
@@ -512,8 +528,8 @@ setstate(arg_state)
*
* Returns a 31-bit random number.
*/
-long
-random()
+static long
+random_old()
{
uint32_t i;
uint32_t *f, *r;
@@ -540,3 +556,87 @@ random()
}
return((long)i);
}
+
+long
+random()
+{
+ /*
+ * This function is a wrapper to the original random(), now renamed
+ * random_old(), to interpose a shuffle buffer to dramatically extend
+ * the generator period at nearly zero additional execution cost,
+ * and an additional storage cost set by the size of the
+ * shuffle buffer (default: 512 longs, or 2K or 4K bytes).
+ * The algorithm was first described in
+ *
+ * Carter Bays and S. D. Durham
+ * Improving a Poor Random Number Generator
+ * ACM Transactions on Mathematical Software (TOMS) 2(1) 59--64 (March 1976)
+ * http://dx.doi.org/10.1145/355666.355670
+ *
+ * and later revisited in
+ *
+ * Carter Bays
+ * C364. Improving a random number generator: a comparison between two shuffling methods
+ * Journal of Statistical Computation and Simulation 36(1) 57--59 (May 1990)
+ * http://dx.doi.org/10.1080/00949659008811264
+ *
+ * The second paper is critically important because it
+ * emphasizes how an apparently trivial change to the final
+ * element selection can destroy the period-lengthening
+ * feature of the original shuffle algorithm.
+ *
+ * Here is a table of the increase in period size for a
+ * shuffle generator using 32-bit and 64-bit unsigned integer
+ * linear congruential generators, which are known to have
+ * significant correlations, and are thus inadvisable for
+ * serious work with random numbers:
+ *
+ * hocd128> for (n = 32; n < 4096; n *= 2) \
+ * printf("%7d\t%12.3.4e\t%12.3.4e\n",
+ * n, \
+ * sqrt(PI * gamma(n + 1)/(2**32 - 1)) / (2**32 - 1), \\
+ * sqrt(PI * gamma(n + 1)/(2**64 - 1)) / (2**64 - 1))
+ *
+ * 32 3.230e+03 1.148e-11
+ * 64 2.243e+30 7.969e+15
+ * 128 3.910e+93 1.389e+79
+ * 256 1.844e+239 6.552e+224
+ * 512 1.174e+569 4.172e+554
+ * 1024 4.635e+1305 1.647e+1291
+ * 2048 8.144e+2932 2.893e+2918
+ *
+ * A generator giving one result per nanosecond would produce
+ * about 3.16e16 random numbers per year, so even for
+ * massively parallel operations with, say, one million CPU
+ * cores, it could not produce more than 10**23 values per
+ * year. The main benefit of an enormous period is that it
+ * makes long-range correlations vanishingly unlikely, even
+ * when starting seeds are similar (e.g., seeds of 0, 1, 2,
+ * ...), and therefore makes possible families of generators
+ * (needed in parallel computations) where the probability of
+ * sequence overlap between family members is essentially
+ * zero.
+ */
+
+ int k;
+ long r;
+ static long s = 0xcafefeedL;
+
+ if (shuffle_init) { /* first time, or seed changed by srand() */
+ for (k = 0; k < SHUFFLE_MAX; k++)
+ shuffle_buffer[k] = random_old();
+
+ s = random_old();
+ shuffle_init = 0;
+ }
+
+ r = random_old();
+ k = s & SHUFFLE_MASK; /* random index into shuffle_buffer[] */
+
+ assert(0L <= k && k < SHUFFLE_MAX);
+
+ s = shuffle_buffer[k];
+ shuffle_buffer[k] = r;
+
+ return (s);
+}
diff --git a/test/ChangeLog b/test/ChangeLog
index 5122534c..1b2d4022 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -306,6 +306,10 @@
* Makefile.am (profile10): New test.
* profile10.awk, profile10.ok: New files.
+2016-02-21 Nelson H.F. Beebe <beebe@math.utah.edu>
+
+ * rand.ok: Updated after code change.
+
2016-02-18 Arnold D. Robbins <arnold@skeeve.com>
* profile2.ok, profile5.ok: Adjust after code changes.
diff --git a/test/rand.ok b/test/rand.ok
index 1df4ba39..4ec90dd6 100644
--- a/test/rand.ok
+++ b/test/rand.ok
@@ -1 +1 @@
- 67 6 77 68 96 26 8 93 53 74 53 95 78 74 96 77 33 58 91
+ 90 23 5 64 38 61 72 5 82 80 33 71 33 34 55 60 90 88 7