diff options
author | Linus Nordberg <linus@nordberg.se> | 1999-03-16 11:30:46 +0100 |
---|---|---|
committer | Linus Nordberg <linus@nordberg.se> | 1999-03-16 11:30:46 +0100 |
commit | 6b271ada539875ce6a09a4f50a64bd9785a2a6de (patch) | |
tree | 266be964a407bcbce55bb8a0087e959e4c7ac485 | |
parent | 9d88b9a05385660699543e5c64b7562ee36d6034 (diff) | |
download | gmp-6b271ada539875ce6a09a4f50a64bd9785a2a6de.tar.gz |
New randomization functions.
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | gmp.h | 49 | ||||
-rw-r--r-- | mpf/Makefile.in | 5 | ||||
-rw-r--r-- | mpf/urandom.c | 61 | ||||
-rw-r--r-- | mpz/Makefile.in | 7 | ||||
-rw-r--r-- | mpz/urandom.c | 270 | ||||
-rw-r--r-- | tests/rand/ChangeLog | 5 | ||||
-rw-r--r-- | tests/rand/Makefile | 28 | ||||
-rw-r--r-- | tests/rand/gen.c | 289 | ||||
-rw-r--r-- | tests/rand/stat.c | 377 | ||||
-rw-r--r-- | tests/rand/statlib.c | 338 | ||||
-rw-r--r-- | tests/rand/statlib.h | 31 |
12 files changed, 1475 insertions, 4 deletions
@@ -1,3 +1,22 @@ +1999-03-15 Linus Nordberg <linus.nordberg@canit.se> + + * mpz/urandom.c (gmp_rand_init): New function. + (gmp_rand_clear): New function. + (mpz_urandomb): New function. + + * mpz/Makefile.in: Compile urandom.c + + * mpf/urandom.c (mpf_urandomb): New function. + + * mpf/Makefile.in: Compile urandom.c. + + * gmp.h (__gmp_rand_state_struct, __gmp_rand_scheme_struct): New + structs for randomization functions. + (gmp_rand_dist, gmp_rand_alogrithm): New enums for randomization + functions. + (mpz_urandomb, mpf_urandomb): Add prototype. + (gmp_rand_init, gmp_rand_clear): Add prototype. + 1999-03-15 Torbjorn Granlund <tege@matematik.su.se> * .gdbinit: New file. @@ -117,6 +117,45 @@ typedef struct /* typedef __mpf_struct MP_FLOAT; */ typedef __mpf_struct mpf_t[1]; +/* Algorithm used by random functions. */ +typedef enum +{ + GMP_RAND_ALG_DEFAULT = 0, + GMP_RAND_ALG_LC = GMP_RAND_ALG_DEFAULT, /* Linear congruental. */ + GMP_RAND_ALG_BBS, /* Blum, Blum, and Shub. */ +} gmp_rand_algorithm; + +/* Distribution types for random functions. */ +/* FIXME: Or should we do the distribution distinction by using + separate functions? */ +typedef enum +{ + GMP_RAND_DIST_DEFAULT = 0, + GMP_RAND_DIST_UNIVERSAL = GMP_RAND_DIST_DEFAULT, +} __gmp_rand_dist; + +typedef struct +{ + unsigned int bits; /* Generate 0 <= Z <= 2^bits - 1 */ + char *astr; /* Multiplier in string form. */ + mpz_t a; /* Multiplier. */ + unsigned long int c; /* Adder. */ + char *mstr; /* Modulus in string form. */ + mpz_t m; /* Modulus. */ +} __gmp_rand_scheme_struct; + +typedef struct +{ + gmp_rand_algorithm alg; /* Algorithm used. */ + mpz_t seed; /* Current seed. */ + unsigned int size; /* Requested size of result in bits. */ + mpz_t maxval; /* Largest generated number (plus one). */ + mpz_t n; /* Number of generated numbers left + before changing scheme. */ + __gmp_rand_scheme_struct *scheme; /* Current scheme. */ +} __gmp_rand_state_struct; + +typedef __gmp_rand_state_struct gmp_rand_state; /* FIXME: typedef as [1]? */ /* Types for function declarations in gmp files. */ /* ??? Should not pollute user name space with these ??? */ @@ -154,6 +193,12 @@ void mp_set_memory_functions _PROTO ((void *(*) (size_t), void (*) (void *, size_t))); extern __gmp_const int mp_bits_per_limb; +int gmp_rand_init _PROTO ((gmp_rand_state *s, + gmp_rand_algorithm alg, + unsigned long int size, + mpz_t seed)); +void gmp_rand_clear _PROTO ((gmp_rand_state *s)); + /**************** Integer (i.e. Z) routines. ****************/ #define _mpz_realloc __gmpz_realloc @@ -264,6 +309,7 @@ extern __gmp_const int mp_bits_per_limb; #define mpz_tdiv_r_ui __gmpz_tdiv_r_ui #define mpz_tstbit __gmpz_tstbit #define mpz_ui_pow_ui __gmpz_ui_pow_ui +#define mpz_urandomb __gmpz_urandomb #define mpz_xor __gmpz_xor #define mpz_eor __gmpz_xor @@ -382,6 +428,7 @@ void mpz_tdiv_r_2exp _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); unsigned long int mpz_tdiv_r_ui _PROTO ((mpz_ptr, mpz_srcptr, unsigned long int)); int mpz_tstbit _PROTO ((mpz_srcptr, unsigned long int)); void mpz_ui_pow_ui _PROTO ((mpz_ptr, unsigned long int, unsigned long int)); +void mpz_urandomb _PROTO ((mpz_t rop, gmp_rand_state *s)); void mpz_xor _PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); #if defined (__cplusplus) } @@ -492,6 +539,7 @@ void mpq_canonicalize _PROTO ((mpq_ptr)); #define mpf_trunc __gmpf_trunc #define mpf_ui_div __gmpf_ui_div #define mpf_ui_sub __gmpf_ui_sub +#define mpf_urandomb __gmpf_urandomb #if defined (__cplusplus) extern "C" { @@ -551,6 +599,7 @@ void mpf_sub_ui _PROTO ((mpf_ptr, mpf_srcptr, unsigned long int)); void mpf_trunc _PROTO ((mpf_ptr, mpf_srcptr)); void mpf_ui_div _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr)); void mpf_ui_sub _PROTO ((mpf_ptr, unsigned long int, mpf_srcptr)); +void mpf_urandomb _PROTO ((mpf_t rop, gmp_rand_state *s)); #if defined (__cplusplus) } #endif diff --git a/mpf/Makefile.in b/mpf/Makefile.in index ae9a580a6..8c0c385bf 100644 --- a/mpf/Makefile.in +++ b/mpf/Makefile.in @@ -36,7 +36,7 @@ MPF_SRCS = init.c init2.c set.c set_ui.c set_si.c set_str.c set_d.c set_z.c \ add.c add_ui.c sub.c sub_ui.c ui_sub.c mul.c mul_ui.c div.c div_ui.c \ cmp.c cmp_ui.c cmp_si.c mul_2exp.c div_2exp.c abs.c neg.c set_q.c get_d.c \ set_dfl_prec.c set_prc.c set_prc_raw.c get_prc.c ui_div.c sqrt_ui.c \ - integer.c pow_ui.c + integer.c pow_ui.c urandom.c MPF_OBJS = init.o init2.o \ set.o set_ui.o set_si.o set_str.o set_d.o set_z.o set_q.o \ iset.o iset_ui.o iset_si.o iset_str.o iset_d.o clear.o get_str.o \ @@ -44,7 +44,7 @@ MPF_OBJS = init.o init2.o \ add.o add_ui.o sub.o sub_ui.o ui_sub.o mul.o mul_ui.o div.o div_ui.o \ cmp.o cmp_ui.o cmp_si.o mul_2exp.o div_2exp.o abs.o neg.o get_d.o \ set_dfl_prec.o set_prc.o set_prc_raw.o get_prc.o ui_div.o sqrt_ui.o \ - floor.o ceil.o trunc.o pow_ui.o + floor.o ceil.o trunc.o pow_ui.o urandom.o LATER_OBJS = inp_raw.o out_raw.o random.o fac_ui.o @@ -129,3 +129,4 @@ sub.o: $(srcdir)/sub.c $(H) sub_ui.o: $(srcdir)/sub_ui.c $(H) ui_div.o: $(srcdir)/ui_div.c $(H) $(srcdir)/../longlong.h ui_sub.o: $(srcdir)/ui_sub.c $(H) +urandom.o: $(srcdir)/urandom.c $(H) diff --git a/mpf/urandom.c b/mpf/urandom.c new file mode 100644 index 000000000..6fb929a5d --- /dev/null +++ b/mpf/urandom.c @@ -0,0 +1,61 @@ +/* urandomb(rop, state) -- Generate a uniform pseudorandom real number +between zero and one, using `state' as the random state previously +initialized by a call to gmp_rand_init(). + +Copyright (C) 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" + +/* mpf_urandomb() -- Generate a universally distributed real random +number 0 <= X < 1. See file mpz/urandom.c for algorithms used. */ + +void +#if __STDC__ +mpf_urandomb (mpf_t rop, gmp_rand_state *s) +#else +mpf_urandomb (rop, s) + mpf_t rop; + gmp_rand_state *s; +#endif +{ + mpz_t z_rand; + mpf_t f_tmp; + + switch (s->alg) + { + case GMP_RAND_ALG_LC: + mpz_init (z_rand); + mpf_init (f_tmp); + + /* FIXME: mpz_urandomb() will most often do a division and a + multiplication in order to produce 0 <= z < 2^size-1. We + only need the division. Break up mpz_urandomb(). */ + + mpz_urandomb (z_rand, s); + mpf_set_z (rop, z_rand); + mpf_set_z (f_tmp, s->scheme->m); + mpf_div (rop, rop, f_tmp); + + mpz_clear (z_rand); + mpf_clear (f_tmp); + + break; + } +} diff --git a/mpz/Makefile.in b/mpz/Makefile.in index 0da5b492e..a55f26ff3 100644 --- a/mpz/Makefile.in +++ b/mpz/Makefile.in @@ -45,7 +45,8 @@ MPZ_SRCS = abs.c add.c add_ui.c addmul_ui.c and.c array_init.c \ scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \ set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c \ sub_ui.c swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \ - tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c xor.c + tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \ + urandom.c xor.c MPZ_OBJS = abs.o add.o add_ui.o addmul_ui.o and.o array_init.o \ bin_ui.o bin_uiui.o cdiv_q.o \ cdiv_q_ui.o cdiv_qr.o cdiv_qr_ui.o cdiv_r.o cdiv_r_ui.o \ @@ -62,7 +63,8 @@ MPZ_OBJS = abs.o add.o add_ui.o addmul_ui.o and.o array_init.o \ scan1.o set.o set_d.o set_f.o set_q.o set_si.o set_str.o \ set_ui.o setbit.o size.o sizeinbase.o sqrt.o sqrtrem.o sub.o \ sub_ui.o swap.o tdiv_ui.o tdiv_q.o tdiv_q_2exp.o tdiv_q_ui.o tdiv_qr.o \ - tdiv_qr_ui.o tdiv_r.o tdiv_r_2exp.o tdiv_r_ui.o tstbit.o ui_pow_ui.o xor.o + tdiv_qr_ui.o tdiv_r.o tdiv_r_2exp.o tdiv_r_ui.o tstbit.o ui_pow_ui.o \ + urandom.o xor.o INCLUDES = -I. -I.. -I../mpn -I$(srcdir)/.. -I$(srcdir) @@ -190,4 +192,5 @@ tdiv_r.o: $(srcdir)/tdiv_r.c $(H) $(srcdir)/../longlong.h $(srcdir)/dmincl.c $(H tdiv_r_ui.o: $(srcdir)/tdiv_r_ui.c $(H) tstbit.o: $(srcdir)/tstbit.c $(H) ui_pow_ui.o: $(srcdir)/ui_pow_ui.c $(H) $(srcdir)/../longlong.h +urandom.o: $(srcdir)/urandom.c $(H) xor.o: $(srcdir)/xor.c $(H) diff --git a/mpz/urandom.c b/mpz/urandom.c new file mode 100644 index 000000000..438b5ac40 --- /dev/null +++ b/mpz/urandom.c @@ -0,0 +1,270 @@ +/* urandomb(rop, state) -- Generate a uniform pseudorandom integer in +the range 0 to 2^n - 1, inclusive, using `state' as the random state +previously initialized by a call to gmp_rand_init(). The `n' in the +above range is passed to gmp_rand_init(). + +Copyright (C) 1999 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* We use (1) + + X = (aX + c) mod m + +[D. Knuth, "The Art of Computer Programming: Volume 2, Seminumerical Algorithms", +Third Edition, Addison Wesley, 1998, pp. 184-185.] + + X is the seed and the result + a is chosen so that + a mod 8 = 5 [3.2.1.2] and [3.2.1.3] + .01m < a < .99m + its binary or decimal digits is not a simple, regular pattern + it has no large quotients when Euclid's algorithm is used to find + gcd(a, m) [3.3.3] + it passes the spectral test [3.3.4] + it passes several tests of [3.3.2] + c has no factor in common with m (c=1 or c=a can be good) + m is large (2^30) + is a power of 2 [3.2.1.1] + +The least significant digits of the generated number are not very +random. It should be regarded as a random fraction X/m. To get a +random integer between 0 and n-1, multiply X/m by n and truncate. +(Don't use X/n [ex 3.4.1-3]) + +The ``accuracy'' in t dimensions is one part in ``the t'th root of m'' [3.3.4]. + +Don't generate more than about m/1000 numbers without changing a, c, or m. + +The sequence length depends on chosen a,c,m. + + +Or, we use (2) + X = a * (X mod q) - r * (long) (X/q) + if X<0 then X+=m + +[Knuth, pp. 185-186.] + + X is the seed and the result + as a seed is nonzero and less than m + a is a primitive root of m (which means that a^2 <= m) + q is (long) m / a + r is m mod a + m is a prime number near the largest easily computed integer + +which gives + + X = a * (X % ((long) m / a)) - + (M % a) * ((long) (X / ((long) m / a))) + +Since m is prime, the least-significant bits of X are just as random as +the most-significant bits. + +*/ + +#include "gmp.h" + +/* Array of CL-schemes, ordered in increasing order for the first + member (the 'bits' value). The 'm' entry is converted by + mpz_set_str() with BASE=0. See its documentation for syntax of + 'm'. End of array is indicated with an entry containing all + zeros. */ +static __gmp_rand_scheme_struct __gmp_rand_scheme[] = +{ + {31, /* Knuth, p. 185 */ + "48271", {0}, /* a */ + 0, /* c = 0 */ + "0x7fffffff", {0}}, /* m = 2^31 - 1 */ + + {31, /* fbsd random(3), lazy */ + "16807", {0}, /* a = 7^5 */ + 0, /* c = 0 */ + "0x7fffffff", {0}}, /* m = 2^31 - 1 */ + + {31, /* fbsd rand(3) */ + "1103515245", {0}, /* a (multiplier) */ + 12345, /* c (adder) */ + "0x80000000", {0}}, /* m (modulo) = 2^31 */ + + {32, "42949685", {0}, 1, "0x100000000", {0}}, + {33, "85899357", {0}, 1, "0x200000000", {0}}, + {34, "171798701", {0}, 1, "0x400000000", {0}}, + {35, "343597397", {0}, 1, "0x800000000", {0}}, + {36, "687194781", {0}, 1, "0x1000000000", {0}}, + {37, "1374389541", {0}, 1, "0x2000000000", {0}}, + {38, "2748779077", {0}, 1, "0x4000000000", {0}}, + {39, "5497558149", {0}, 1, "0x8000000000", {0}}, + {40, "10995116285", {0}, 1, "0x10000000000", {0}}, + + {56, "720575940379293", + {0}, 1, "0x100000000000000", {0}}, + {64, "184467440737095525", + {0}, 1, "0x10000000000000000", {0}}, + {100, "12676506002282294014967032061", + {0}, 1, "0x10000000000000000000000000", {0}}, + {128, "3402823669209384634633746074317682125", + {0}, 1, "0x100000000000000000000000000000000", {0}}, + {156, "913438523331814323877303020447676887284957853", + {0}, 1, "0x1000000000000000000000000000000000000000", {0}}, + {196, "1004336277661868922213726307713226626576376871114245522077", + {0}, 1, "0x10000000000000000000000000000000000000000000000000", {0}}, + {200, "16069380442589902755419620923411626025222029937827928353021", + {0}, 1, "0x100000000000000000000000000000000000000000000000000", {0}}, + {256, "1157920892373161954235709850086879078532699846656405640394575840079131296413", + {0}, 1, "0x10000000000000000000000000000000000000000000000000000000000000000", {0}}, + + /* {, "", {0}, 1, "0x", {0}}, */ + {0, NULL, {0}, 0, NULL, {0}} /* End of array. */ +}; + +/* gmp_rand_init() -- Initialize a gmp_rand_state struct. Return 0 on + success and 1 on failure. */ + +int +#if __STDC__ +gmp_rand_init (gmp_rand_state *s, + gmp_rand_algorithm alg, + unsigned long int size, + mpz_t seed) +#else +gmp_rand_init (s, alg, size, seed) + gmp_rand_state *s; + gmp_rand_algorithm alg; + unsigned long int size; + mpz_t seed; +#endif +{ + __gmp_rand_scheme_struct *sp; + + switch (alg) + { + case GMP_RAND_ALG_LC: /* Linear congruental. */ + /* Convert the 'a' and 'm' strings to mpz_t's. */ + for (sp = __gmp_rand_scheme; sp->bits; sp++) + { + mpz_init_set_str (sp->a, sp->astr, 0); + mpz_init_set_str (sp->m, sp->mstr, 0); + } + /* Pick a scheme. */ + s->scheme = NULL; + for (sp = __gmp_rand_scheme; sp->bits; sp++) + if (sp->bits >= size) + { + s->scheme = sp; + break; + } + if (NULL == s->scheme) /* Nothing big enough found. */ + s->scheme = --sp; /* Use biggest available. */ + + mpz_init (s->n); + mpz_tdiv_q_ui (s->n, s->scheme->m, 1000UL); + break; + case GMP_RAND_ALG_BBS: + return 1; /* Not implemented yet. */ + break; + default: /* Bad choice. */ + return 1; + } + + /* Common. */ + s->alg = alg; + s->size = size; + mpz_init_set_ui (s->maxval, 1); + mpz_mul_2exp (s->maxval, s->maxval, s->size); + mpz_init_set (s->seed, seed); + + return 0; +} + +void +#if __STDC__ +gmp_rand_clear (gmp_rand_state *s) +#else +gmp_rand_clear (s) + gmp_rand_state *s; +#endif +{ + __gmp_rand_scheme_struct *sp; + + + mpz_clear (s->seed); + mpz_clear (s->maxval); + + switch (s->alg) + { + case GMP_RAND_ALG_LC: /* Linear congruental. */ + mpz_clear (s->n); + for (sp = __gmp_rand_scheme; sp->bits; sp++) + { + mpz_clear (sp->a); + mpz_clear (sp->m); + } + break; + } +} + +void +#if __STDC__ +mpz_urandomb (mpz_t rop, gmp_rand_state *s) +#else +mpz_urandomb (rop, s) + mpz_t rop; + gmp_rand_state *s; +#endif +{ + switch (s->alg) + { + case GMP_RAND_ALG_LC: + if (mpz_cmp_ui (s->n, 0) == 0) + { + if ((s->scheme + 1)->bits) + s->scheme++; + mpz_tdiv_q_ui (s->n, s->scheme->m, 1000UL); + } + else + mpz_sub_ui (s->n, s->n, 1); + + /* rop = (seed * a + c) % m */ + mpz_mul (rop, s->seed, s->scheme->a); + mpz_add_ui (rop, rop, s->scheme->c); + mpz_mod (rop, rop, s->scheme->m); + + if (mpz_cmp (s->scheme->m, s->maxval)) + { + /* rop = (rop / m) * maxval, truncate result */ + mpf_t ft1, ft2; + + mpf_init (ft1); + mpf_init (ft2); + + mpf_set_z (ft1, rop); + mpf_set_z (ft2, s->scheme->m); + mpf_div (ft1, ft1, ft2); + mpf_set_z (ft2, s->maxval); + mpf_mul (ft1, ft1, ft2); + mpz_set_f (rop, ft1); /* Truncating. */ + + mpf_clear (ft1); + mpf_clear (ft2); + } + break; /* GMP_RAND_ALG_LC */ + } + + /* Save result for next seed. */ + mpz_set (s->seed, rop); +} diff --git a/tests/rand/ChangeLog b/tests/rand/ChangeLog new file mode 100644 index 000000000..c6611e1c8 --- /dev/null +++ b/tests/rand/ChangeLog @@ -0,0 +1,5 @@ +1999-03-15 Linus Nordberg <linus.nordberg@canit.se> + + * gen.c, stat.c, statlib.c, statlib.h: New files. + * Makefile, ChangeLog: New files. + diff --git a/tests/rand/Makefile b/tests/rand/Makefile new file mode 100644 index 000000000..56dc6c7bd --- /dev/null +++ b/tests/rand/Makefile @@ -0,0 +1,28 @@ +CC = gcc +GMPDIR = ../.. +#GMPLIBDIR = $(GMPDIR) +GMPLIBDIR = /tmp/linus/gmp +GMPLIB = $(GMPLIBDIR)/libgmp.a +GMPINC = $(GMPDIR)/gmp.h + +DEP = statlib.o $(GMPLIB) $(GMPINC) + +CFLAGS = -Wall -g -I$(GMPDIR) -L$(GMPLIBDIR) $(XCFLAGS) +LIBS = -lgmp -lm + +findcl: findcl.c $(DEP) +gen: gen.c $(DEP) +stat: stat.c $(DEP) + $(CC) $(CFLAGS) -L$(GMPLIBDIR) -L. -o $@ $< statlib.o $(LIBS) +foo: foo.c $(DEP) + $(CC) $(CFLAGS) -L$(GMPLIBDIR) -o $@ $< -lgmp + +statlib.o: statlib.c statlib.h $(GMPLIB) + +ks: ks.c $(DEP) + $(CC) $(CFLAGS) -o $@ $< statlib.o $(LIBS) + +%.o : %.c + $(CC) $(CFLAGS) -c -o $@ $< +% : %.c + $(CC) $(CFLAGS) -o $@ $< $(LIBS) diff --git a/tests/rand/gen.c b/tests/rand/gen.c new file mode 100644 index 000000000..dbd40c84a --- /dev/null +++ b/tests/rand/gen.c @@ -0,0 +1,289 @@ +/* gen.c -- Generate pseudorandom numbers. */ + +/* Examples: + + $ gen 10 +10 real numbers 0 <= X < 1 generated by mpf_urandomb() + + $ gen -f mpz_urandomb 10 +10 integers 0 <= X < 2^32 + + $ gen -f mpz_urandomb -z 127 10 +10 integers 0 <= X < 2^127 + + $ gen -x .9,1 10 +10 real numbers 0 <= X < .9 + + $ gen -s 1 10 +10 real numbers, sequence seeded with 1 + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <time.h> +#include <string.h> +#include <gmp.h> + +int main (argc, argv) + int argc; + char *argv[]; +{ + const char usage[] = + "usage: gen [-abhx] [-f func] [-s #] [-z #] n\n" \ + " n number of numbers to generate\n" \ + " -a ASCII output in radix 10 (default)\n" \ + " -b binary output\n" \ + " -f func random function, one of\n" \ + " mpz_urandomb, mpf_urandomb (default), rand, random\n" \ + " -h print this text and exit\n" \ + " -s # initial seed (default: output from time(3))\n" \ + " -z # size in bits of generated integer numbers (0<= X <2^#) \n" \ + " (default 32)\n" \ + " -x f,t exclude all numbers f <= x <= t\n" \ + ""; + + unsigned long int f; + unsigned long int n; + unsigned long int seed; + int seed_from_user = 0; + unsigned int size = 32; + mpz_t z1; + mpf_t f1; + gmp_rand_state s; + int c, i; + int ascout = 1, binout = 0; + double drand; + long lrand; + int do_exclude = 0; + mpf_t f_xf, f_xt; /* numbers to exclude from sequence */ + char *str_xf, *str_xt; /* numbers to exclude from sequence */ + enum + { + RFUNC_mpz_urandomb = 0, + RFUNC_mpf_urandomb, + RFUNC_rand, + RFUNC_random, + } rfunc = RFUNC_mpf_urandomb; + char *rfunc_str[] = { "mpz_urandomb", "mpf_urandomb", "rand", "random" }; + gmp_rand_algorithm rand_alg = GMP_RAND_ALG_DEFAULT; + + mpf_init (f_xf); mpf_init (f_xt); + + while ((c = getopt (argc, argv, "abf:hn:s:z:x:")) != -1) + switch (c) + { + case 'a': + ascout = 1; + binout = 0; + break; + + case 'b': + ascout = 0; + binout = 1; + break; + + case 'f': + rfunc = -1; + for (f = 0; f < sizeof (rfunc_str) / sizeof (*rfunc_str); f++) + if (!strcmp (optarg, rfunc_str[f])) + rfunc = f; + if (rfunc == -1) + { + fputs (usage, stderr); + exit (1); + } + break; + + case 's': + seed = strtoul (optarg, NULL, 10); + seed_from_user = 1; + break; + + case 'z': + size = atoi (optarg); + if (size < 1) + { + fprintf (stderr, "gen: bad size argument (-z %u)\n", size); + exit (1); + } + break; + + case 'x': /* exclude */ + str_xf = optarg; + str_xt = strchr (optarg, ','); + if (NULL == str_xt) + { + fprintf (stderr, "gen: bad exclusion parameters: %s\n", optarg); + exit (1); + } + *str_xt++ = '\0'; + do_exclude = 1; + break; + + case 'h': + case '?': + default: + fputs (usage, stderr); + exit (1); + } + argc -= optind; + argv += optind; + + if (argc < 1) + { + fputs (usage, stderr); + exit (1); + } + + mpz_init (z1); + mpf_init (f1); + + if (!seed_from_user) + seed = (unsigned long int) time (NULL); + + /* set seed */ + switch (rfunc) + { + case RFUNC_mpz_urandomb: + case RFUNC_mpf_urandomb: + mpz_set_ui (z1, seed); + gmp_rand_init (&s, rand_alg, size, z1); + break; + + case RFUNC_rand: + srand (seed); + break; + + case RFUNC_random: + /* srandom (seed); */ + srandomdev (); + break; + + default: + fprintf (stderr, "gen: random function not implemented\n"); + exit (1); + } + + /* set up excludes */ + if (do_exclude) + switch (rfunc) + { + case RFUNC_mpf_urandomb: + + if (mpf_set_str (f_xf, str_xf, 10) || + mpf_set_str (f_xt, str_xt, 10)) + { + fprintf (stderr, "gen: bad exclusion-from (\"%s\") " \ + "or exclusion-to (\"%s\") string. no exclusion done.\n", + str_xf, str_xt); + do_exclude = 0; + } + break; + + default: + fprintf (stderr, "gen: exclusion not implemented for chosen " \ + "randomization function. all numbers included in sequence.\n"); + } + + /* generate and print */ + n = strtoul (argv[0], (char **) NULL, 10); + for (f = 0; f < n; f++) + { + switch (rfunc) + { + case RFUNC_mpz_urandomb: + mpz_urandomb (z1, &s); + if (binout) + { + /*fwrite ((unsigned int *) z1->_mp_d, 4, 1, stdout);*/ + fprintf (stderr, "gen: binary output for mpz_urandomb is broken\n"); + exit (1); + } + else + { + mpz_out_str (stdout, 10, z1); + puts (""); + } + + break; + + case RFUNC_mpf_urandomb: + mpf_urandomb (f1, &s); + if (do_exclude) + if (mpf_cmp (f1, f_xf) >= 0 && mpf_cmp (f1, f_xt) <= 0) + break; + if (binout) + { + fprintf (stderr, "gen: binary output for floating point numbers "\ + "not implemented\n"); + exit (1); + } + else + { + mpf_out_str (stdout, 10, 0, f1); + puts (""); + } + break; + + case RFUNC_rand: + i = rand (); +#ifdef FLOAT_OUTPUT + if (i) + drand = (double) i / (double) RAND_MAX; + else + drand = 0.0; + if (binout) + fwrite (&drand, sizeof (drand), 1, stdout); + else + printf ("%e\n", drand); +#else + if (binout) + fwrite (&i, sizeof (i), 1, stdout); + else + printf ("%d\n", i); +#endif + break; + + case RFUNC_random: + lrand = random (); + if (lrand) + drand = (double) lrand / (double) 0x7fffffff; + else + drand = 0; + if (binout) + fwrite (&drand, sizeof (drand), 1, stdout); + else + printf ("%e\n", drand); + break; + + default: + fprintf (stderr, "gen: random function not implemented\n"); + exit (1); + } + + } + + /* clean up */ + switch (rfunc) + { + case RFUNC_mpz_urandomb: + case RFUNC_mpf_urandomb: + gmp_rand_clear (&s); + break; + default: + break; + } + mpz_clear (z1); + mpf_clear (f1); + mpf_clear (f_xf); mpf_clear (f_xt); + + return 0; +} + + + + + diff --git a/tests/rand/stat.c b/tests/rand/stat.c new file mode 100644 index 000000000..40459ddec --- /dev/null +++ b/tests/rand/stat.c @@ -0,0 +1,377 @@ +/* stat.c -- statistical tests of random number sequences. */ + +/* Examples: + + $ gen 1000 | stat +Test 1000 real numbers. + + $ gen 30000 | stat -2 1000 +Test 1000 real numbers 30 times and then test the 30 results in a +``second level''. + + $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff +Test 1000 integers 0 <= X <= 2^32-1. + + $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff +Test 1000 integers 0 <= X <= 2^34-1. + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <math.h> +#include <gmp.h> +#include "statlib.h" + +#define FVECSIZ (100000L) + +static void +print_ks_results (mpf_t f_p, mpf_t f_p_prob, + mpf_t f_m, mpf_t f_m_prob, + FILE *fp) +{ + double p, pp, m, mp; + + p = mpf_get_d (f_p); + m = mpf_get_d (f_m); + pp = mpf_get_d (f_p_prob); + mp = mpf_get_d (f_m_prob); + + fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0); + fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0); +} + +static void +print_x2_table (unsigned int v, FILE *fp) +{ + double t[7]; + int f; + + + fprintf (fp, "Chi-square table for v=%u\n", v); + fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n"); + x2_table (t, v); + for (f = 0; f < 7; f++) + fprintf (fp, "%.2f\t", t[f]); + fputs ("\n", fp); +} + + + +/* Pks () -- Distribution function for KS results with a big n (like 1000 + or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */ +/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */ + +static void +Pks (mpf_t p, mpf_t x) +{ + double dt; /* temp double */ + + mpf_set (p, x); + mpf_mul (p, p, p); /* p = x^2 */ + mpf_mul_ui (p, p, 2); /* p = 2*x^2 */ + mpf_neg (p, p); /* p = -2*x^2 */ + /* No pow() in gmp. Use doubles. */ + /* FIXME: Use exp()? */ + dt = pow (M_E, mpf_get_d (p)); + mpf_set_d (p, dt); + mpf_ui_sub (p, 1, p); +} + +/* f_freq() -- frequency test on real numbers 0<=f<1*/ +static void +f_freq (const unsigned l1runs, const unsigned l2runs, + mpf_t fvec[], const unsigned long n) +{ + unsigned f; + mpf_t f_p, f_p_prob; + mpf_t f_m, f_m_prob; + mpf_t *l1res; /* level 1 result array */ + + mpf_init (f_p); mpf_init (f_m); + mpf_init (f_p_prob); mpf_init (f_m_prob); + + + /* Allocate space for 1st level results. */ + l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t)); + if (NULL == l1res) + { + fprintf (stderr, "stat: malloc failure\n"); + exit (1); + } + + printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n"); + printf ("\tKp\t\tKm\n"); + + for (f = 0; f < l2runs; f++) + { + printf ("%u:\t", f + 1); + /* f_printvec (fvec, n); */ + mpf_freqt (f_p, f_m, fvec + f * n, n); + + /* save result */ + mpf_init_set (l1res[f], f_p); + mpf_init_set (l1res[f + l2runs], f_m); + + /* what's the probability of getting these results? */ + ks_table (f_p_prob, f_p, n); + ks_table (f_m_prob, f_m, n); + + print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); + } + + /* Now, apply the KS test on the results from the 1st level rounds + with the distribution + F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */ + + printf ("-------------------------------------\n"); + + /* The Kp's. */ + ks (f_p, f_m, l1res, Pks, l2runs); + ks_table (f_p_prob, f_p, l2runs); + ks_table (f_m_prob, f_m, l2runs); + printf ("Kp:\t"); + print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); + + /* The Km's. */ + ks (f_p, f_m, l1res + l2runs, Pks, l2runs); + ks_table (f_p_prob, f_p, l2runs); + ks_table (f_m_prob, f_m, l2runs); + printf ("Km:\t"); + print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); + + mpf_clear (f_p); mpf_clear (f_m); + mpf_clear (f_p_prob); mpf_clear (f_m_prob); + free (l1res); +} + +/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers + 0<=z<=MAX */ +static void +z_freq (const unsigned l1runs, + const unsigned l2runs, + mpz_t zvec[], + const unsigned long n, + unsigned int max) +{ + mpf_t V; /* result */ + double d_V; /* result as a double */ + + mpf_init (V); + + + printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max); + print_x2_table (max, stdout); + + mpz_freqt (V, zvec, max, n); + + d_V = mpf_get_d (V); + printf ("V = %.2f (n = %lu)\n", d_V, n); + + mpf_clear (V); +} + +unsigned int stat_debug = 0; + +int +main (argc, argv) + int argc; + char *argv[]; +{ + const char usage[] = + "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \ + " file filename\n" \ + " -2 runs perform 2-level test with RUNS runs on 1st level\n" \ + " -d increase debugging level\n" \ + " -i max input is integers 0 <= Z <= MAX\n" \ + " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \ + " maximum value when converting real numbers to integers\n" \ + ""; + + mpf_t fvec[FVECSIZ]; + mpz_t zvec[FVECSIZ]; + unsigned long int f, n, vecentries; + char *filen; + FILE *fp; + int c; + int omitoutput = 0; + int realinput = -1; /* 1: input is real numbers 0<=R<1; + 0: input is integers 0 <= Z <= MAX. */ + long l1runs, /* 1st level runs */ + l2runs = 1; /* 2nd level runs */ + mpf_t f_temp; + mpz_t z_imax; /* max value when converting between + real number and integer. */ + mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for + convenience */ + mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for + convenience */ + + + mpf_init (f_temp); + mpz_init_set_ui (z_imax, 0x7fffffff); + mpf_init (f_imax_plus1); + mpf_init (f_imax_minus1); + + while ((c = getopt (argc, argv, "d2:i:r:")) != -1) + switch (c) + { + case '2': + l1runs = atol (optarg); + l2runs = -1; /* set later on */ + break; + case 'd': /* increase debug level */ + stat_debug++; + break; + case 'i': + if (1 == realinput) + { + fputs ("stat: options -i and -r are mutually exclusive\n", stderr); + exit (1); + } + if (mpz_set_str (z_imax, optarg, 0)) + { + fprintf (stderr, "stat: bad max value %s\n", optarg); + exit (1); + } + realinput = 0; + break; + case 'r': + if (0 == realinput) + { + fputs ("stat: options -i and -r are mutually exclusive\n", stderr); + exit (1); + } + if (mpz_set_str (z_imax, optarg, 0)) + { + fprintf (stderr, "stat: bad max value %s\n", optarg); + exit (1); + } + realinput = 1; + break; + case 'o': + omitoutput = atoi (optarg); + break; + case '?': + default: + fputs (usage, stderr); + exit (1); + } + argc -= optind; + argv += optind; + + if (argc < 1) + fp = stdin; + else + filen = argv[0]; + + if (fp != stdin) + if (NULL == (fp = fopen (filen, "r"))) + { + perror (filen); + exit (1); + } + + if (-1 == realinput) + realinput = 1; /* default is real numbers */ + + /* read file and fill appropriate vec */ + if (1 == realinput) /* real input */ + { + for (f = 0; f < FVECSIZ ; f++) + { + mpf_init (fvec[f]); + if (!mpf_inp_str (fvec[f], fp, 10)) + break; + } + } + else /* integer input */ + { + for (f = 0; f < FVECSIZ ; f++) + { + mpz_init (zvec[f]); + if (!mpz_inp_str (zvec[f], fp, 10)) + break; + } + } + vecentries = n = f; /* number of entries read */ + fclose (fp); + + if (FVECSIZ == f) + fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\ + "of only %ld entries. sorry.\n", FVECSIZ); + + printf ("Got %lu numbers.\n", n); + + /* convert and fill the other vec */ + /* since fvec[] contains 0<=f<1 and we want ivec[] to contain + 0<=z<=imax and we are truncating all fractions when + converting float to int, we have to add 1 to imax.*/ + mpf_set_z (f_imax_plus1, z_imax); + mpf_add_ui (f_imax_plus1, f_imax_plus1, 1); + if (1 == realinput) /* fill zvec[] */ + { + for (f = 0; f < n; f++) + { + mpf_mul (f_temp, fvec[f], f_imax_plus1); + mpz_init (zvec[f]); + mpz_set_f (zvec[f], f_temp); /* truncating fraction */ + if (stat_debug > 1) + { + mpz_out_str (stderr, 10, zvec[f]); + fputs ("\n", stderr); + } + } + } + else /* integer input; fill fvec[] */ + { + /* mpf_set_z (f_imax_minus1, z_imax); + mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/ + for (f = 0; f < n; f++) + { + mpf_init (fvec[f]); + mpf_set_z (fvec[f], zvec[f]); + mpf_div (fvec[f], fvec[f], f_imax_plus1); + if (stat_debug > 1) + { + mpf_out_str (stderr, 10, 0, fvec[f]); + fputs ("\n", stderr); + } + } + } + + /* 2 levels? */ + if (1 != l2runs) + { + l2runs = n / l1runs; + printf ("Doing %ld second level rounds "\ + "with %ld entries in each round", l2runs, l1runs); + if (n % l1runs) + printf (" (discarding %ld entr%s)", n % l1runs, + n % l1runs == 1 ? "y" : "ies"); + puts (""); + n = l1runs; + } + + f_freq (l1runs, l2runs, fvec, n); +#ifdef DO_ZFREQ + z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax)); +#endif + + mpf_clear (f_temp); mpz_clear (z_imax); + mpf_clear (f_imax_plus1); + mpf_clear (f_imax_minus1); + for (f = 0; f < vecentries; f++) + { + mpf_clear (fvec[f]); + mpz_clear (zvec[f]); + } + + return 0; +} + + + + + diff --git a/tests/rand/statlib.c b/tests/rand/statlib.c new file mode 100644 index 000000000..b8cc255e8 --- /dev/null +++ b/tests/rand/statlib.c @@ -0,0 +1,338 @@ +/* statlib.c -- Statistical functions. Good for testing the + randomness of number sequences. */ + +/* The theories for these functions are taken from D. Knuth's "The Art +of Computer Programming: Volume 2, Seminumerical Algorithms", Third +Edition, Addison Wesley, 1998. */ + +/* Implementation notes. + +The Kolmogorov-Smirnov test. + +Eq. (13) in Knuth, p. 50, says that if X1, X2, ..., Xn are independent +observations arranged into ascending order + + Kp = sqr(n) * max(j/n - F(Xj)) for all 1<=j<=n + Km = sqr(n) * max(F(Xj) - (j-1)/n)) for all 1<=j<=n + +where F(x) = Pr(X <= x) = probability that (X <= x), which for a +uniformly distributed random real number between zero and one is +exactly the number itself (x). + + +The answer to exercise 23 gives the following implementation, which +doesn't need the observations to be sorted in ascending order: + +for (k = 0; k < m; k++) + a[k] = 1.0 + b[k] = 0.0 + c[k] = 0 + +for (each observation Xj) + Y = F(Xj) + k = floor (m * Y) + a[k] = min (a[k], Y) + b[k] = max (b[k], Y) + c[k] += 1 + + j = 0 + rp = rm = 0 + for (k = 0; k < m; k++) + if (c[k] > 0) + rm = max (rm, a[k] - j/n) + j += c[k] + rp = max (rp, j/n - b[k]) + +Kp = sqr (n) * rp +Km = sqr (n) * rm + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <gmp.h> + +extern unsigned int stat_debug; + +/* ks (Kp, Km, X, P, n) -- Perform a Kolmogorov-Smirnov test on the N + real numbers between zero and one in vector X. P is the + distribution function, called for each entry in X, which should + calculate the probability of X being greater than or equal to any + number in the sequence. (For a uniformly distributed sequence of + real numbers between zero and one, this is simply equal to X.) The + result is put in Kp and Km. */ + +void +ks (mpf_t Kp, + mpf_t Km, + mpf_t X[], + void (P) (mpf_t, mpf_t), + unsigned long int n) +{ + mpf_t Kt; /* temp */ + mpf_t f_x; + mpf_t f_j; /* j */ + mpf_t f_jnq; /* j/n or (j-1)/n */ + unsigned long int j; + + /* Sort the vector in ascending order. */ + qsort (X, n, sizeof (__mpf_struct), mpf_cmp); + + /* K-S test. */ + /* Kp = sqr(n) * max(j/n - F(Xj)) for all 1<=j<=n + Km = sqr(n) * max(F(Xj) - (j-1)/n)) for all 1<=j<=n + */ + + mpf_init (Kt); mpf_init (f_x); mpf_init (f_j); mpf_init (f_jnq); + mpf_set_ui (Kp, 0); mpf_set_ui (Km, 0); + for (j = 1; j <= n; j++) + { + P (f_x, X[j-1]); + mpf_set_ui (f_j, j); + + mpf_div_ui (f_jnq, f_j, n); + mpf_sub (Kt, f_jnq, f_x); + if (mpf_cmp (Kt, Kp) > 0) + mpf_set (Kp, Kt); + if (stat_debug > 2) + { + printf ("j=%lu ", j); + printf ("P()="); mpf_out_str (stdout, 10, 2, f_x); printf ("\t"); + + printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" "); + printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" "); + printf ("Kp="); mpf_out_str (stdout, 10, 2, Kp); printf ("\t"); + } + mpf_sub_ui (f_j, f_j, 1); + mpf_div_ui (f_jnq, f_j, n); + mpf_sub (Kt, f_x, f_jnq); + if (mpf_cmp (Kt, Km) > 0) + mpf_set (Km, Kt); + + if (stat_debug > 2) + { + printf ("jnq="); mpf_out_str (stdout, 10, 2, f_jnq); printf (" "); + printf ("diff="); mpf_out_str (stdout, 10, 2, Kt); printf (" "); + printf ("Km="); mpf_out_str (stdout, 10, 2, Km); printf (" "); + printf ("\n"); + } + } + mpf_sqrt_ui (Kt, n); + mpf_mul (Kp, Kp, Kt); + mpf_mul (Km, Km, Kt); + + mpf_clear (Kt); mpf_clear (f_x); mpf_clear (f_j); mpf_clear (f_jnq); +} + +/* ks_table(val, n) -- calculate probability for Kp/Km less than or + equal to VAL with N observations. See [Knuth section 3.3.1] */ + +void +ks_table (mpf_t p, mpf_t val, const unsigned int n) +{ + /* We use Eq. (27), Knuth p.58, skipping O(1/n) for simplicity. + This shortcut will result in too high probabilities, especially + when n is small. + + Pr(Kp(n) <= s) = 1 - pow(e, -2*s^2) * (1 - 2/3*s/sqrt(n) + O(1/n)) */ + + mpf_t t1, t2; + + mpf_init (t1); mpf_init (t2); + + /* t1 = 1 - 2/3 * s/sqrt(n) */ + mpf_sqrt_ui (t1, n); + mpf_div (t1, val, t1); + mpf_mul_ui (t1, t1, 2); + mpf_div_ui (t1, t1, 3); + mpf_ui_sub (t1, 1, t1); + + /* t2 = pow(e, -2*s^2) */ + /* hmmm, gmp doesn't have pow() for floats. use doubles. */ + mpf_set_d (t2, pow (M_E, -(2 * pow (mpf_get_d (val), 2)))); + + /* p = 1 - t1 * t2 */ + mpf_mul (t1, t1, t2); + mpf_ui_sub (p, 1, t1); + + mpf_clear (t1); mpf_clear (t2); +} + +static double x2_table_X[][7] = { + { -2.33, -1.64, -.674, 0.0, 0.674, 1.64, 2.33 }, /* x */ + { 5.4289, 2.6896, .454276, 0.0, .454276, 2.6896, 5.4289} /* x^2 */ +}; + +#define _2D3 ((double) .6666666666) + +/* x2_table (t, v, n) -- return chi-square table row for V in T[]. */ +void +x2_table (double t[], + unsigned int v) +{ + int f; + + + /* FIXME: Do a table lookup for v <= 30 since the following formula + [Knuth, vol 2, 3.3.1] is only good for v > 30. */ + + /* value = v + sqrt(2*v) * X[p] + (2/3) * X[p]^2 - 2/3 + O(1/sqrt(t) */ + /* NOTE: The O() term is ignored for simplicity. */ + + for (f = 0; f < 7; f++) + t[f] = + v + + sqrt (2 * v) * x2_table_X[0][f] + + _2D3 * x2_table_X[1][f] - _2D3; +} + + +/* P(p, x) -- Distribution function. Calculate the probability of X +being greater than or equal to any number in the sequence. For a +random real number between zero and one given by a uniformly +distributed random number generator, this is simply equal to X. */ + +static void +P (mpf_t p, mpf_t x) +{ + mpf_set (p, x); +} + +/* mpf_freqt() -- Frequency test using KS on N real numbers between zero + and one. See [Knuth vol 2, p.61]. */ +void +mpf_freqt (mpf_t Kp, + mpf_t Km, + mpf_t X[], + const unsigned long int n) +{ + ks (Kp, Km, X, P, n); +} + + +/* The Chi-square test. Eq. (8) in Knuth vol. 2 says that if Y[] + holds the observations and p[] is the probability for.. (to be + continued!) + + V = 1/n * sum((s=1 to k) Y[s]^2 / p[s]) - n */ + +void +x2 (mpf_t V, /* result */ + unsigned long int X[], /* data */ + unsigned int k, /* #of categories */ + void (P) (mpf_t, unsigned long int, void *), /* probability func */ + void *x, /* extra user data passed to P() */ + unsigned long int n) /* #of samples */ +{ + unsigned int f; + mpf_t f_t, f_t2; /* temp floats */ + + mpf_init (f_t); mpf_init (f_t2); + + + mpf_set_ui (V, 0); + for (f = 0; f < k; f++) + { + if (stat_debug > 1) + fprintf (stderr, "%u: P()=", f); + mpf_set_ui (f_t, X[f]); + mpf_mul (f_t, f_t, f_t); /* f_t = X[f]^2 */ + P (f_t2, f, x); /* f_t2 = Pr(f) */ + if (stat_debug > 1) + mpf_out_str (stderr, 10, 2, f_t2); + mpf_div (f_t, f_t, f_t2); + mpf_add (V, V, f_t); + if (stat_debug > 1) + { + fprintf (stderr, "\tV="); + mpf_out_str (stderr, 10, 2, V); + fprintf (stderr, "\t"); + } + } + if (stat_debug > 1) + fprintf (stderr, "\n"); + mpf_div_ui (V, V, n); + mpf_sub_ui (V, V, n); + + mpf_clear (f_t); mpf_clear (f_t2); +} + +/* Pzf(p, s, x) -- Probability for category S in mpz_freqt(). It's + 1/d for all S. X is a pointer to an unsigned int holding 'd'. */ +static void +Pzf (mpf_t p, unsigned long int s, void *x) +{ + mpf_set_ui (p, 1); + mpf_div_ui (p, p, *((unsigned int *) x)); +} + +/* mpz_freqt(V, X, imax, n) -- Frequency test on integers. [Knuth, + vol 2, 3.3.2]. Keep IMAX low on this one, since we loop from 0 to + IMAX. 128 or 256 could be nice. + + X[] must not contain numbers outside the range 0 <= X <= IMAX. + + Return value is number of observations actally used, after + discarding entries out of range. + + Since X[] contains integers between zero and IMAX, inclusive, we + have IMAX+1 categories. + + Note that N should be at least 5*IMAX. Result is put in V and can + be compared to output from x2_table (v=IMAX). */ + +unsigned long int +mpz_freqt (mpf_t V, + mpz_t X[], + unsigned int imax, + const unsigned long int n) +{ + unsigned long int *v; /* result */ + unsigned int f; + unsigned int d; /* number of categories = imax+1 */ + unsigned int uitemp; + unsigned long int usedn; + + + d = imax + 1; + + v = (unsigned long int *) calloc (imax + 1, sizeof (unsigned long int)); + if (NULL == v) + { + fprintf (stderr, "mpz_freqt(): out of memory\n"); + exit (1); + } + + /* count */ + usedn = n; /* actual number of observations */ + for (f = 0; f < n; f++) + { + uitemp = mpz_get_ui(X[f]); + if (uitemp > imax) /* sanity check */ + { + if (stat_debug) + fprintf (stderr, "mpz_freqt(): warning: input insanity: %u, "\ + "ignored.\n", uitemp); + usedn--; + continue; + } + v[uitemp]++; + } + + if (stat_debug > 1) + { + fprintf (stderr, "counts:\n"); + for (f = 0; f <= imax; f++) + fprintf (stderr, "%u:\t%lu\n", f, v[f]); + } + + /* chi-square with k=imax+1 and P(x)=1/(imax+1) for all x.*/ + x2 (V, v, d, Pzf, (void *) &d, usedn); + + free (v); + return (usedn); +} + + + diff --git a/tests/rand/statlib.h b/tests/rand/statlib.h new file mode 100644 index 000000000..181956666 --- /dev/null +++ b/tests/rand/statlib.h @@ -0,0 +1,31 @@ +/* statlib.h */ + +/* This file requires the following header files: gmp.h */ + +void +mpf_freqt (mpf_t Kp, + mpf_t Km, + mpf_t X[], + const unsigned long int n); +unsigned long int +mpz_freqt (mpf_t V, + mpz_t X[], + unsigned int imax, + const unsigned long int n); + +/* Low level functions. */ +void +ks (mpf_t Kp, + mpf_t Km, + mpf_t X[], + void (P) (mpf_t, mpf_t), + const unsigned long int n); + +void +ks_table (mpf_t p, mpf_t val, const unsigned int n); + +void +x2_table (double t[], + unsigned int v); + +/* eof statlib.h */ |