diff options
author | Kevin Ryde <user42@zip.com.au> | 2000-04-23 17:43:46 +0200 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2000-04-23 17:43:46 +0200 |
commit | 9e98d1cef57714e9c0c62f367761c46619ccd61d (patch) | |
tree | 8ea6861b488f4e8054db37e4fa4eed79a98513ec /tune/common.c | |
parent | a6d24214409330af111bf4ef97c6d4f1f903dde9 (diff) | |
download | gmp-9e98d1cef57714e9c0c62f367761c46619ccd61d.tar.gz |
Various small mods.
Make mpn logops available to measure.
Diffstat (limited to 'tune/common.c')
-rw-r--r-- | tune/common.c | 64 |
1 files changed, 36 insertions, 28 deletions
diff --git a/tune/common.c b/tune/common.c index 6c0147909..b1ac37148 100644 --- a/tune/common.c +++ b/tune/common.c @@ -28,7 +28,9 @@ MA 02111-1307, USA. #include <stdlib.h> #include <string.h> #include <unistd.h> +#if 0 #include <sys/ioctl.h> +#endif #include "gmp.h" #include "gmp-impl.h" @@ -204,16 +206,16 @@ mpn_cache_fill_write (mp_ptr ptr, mp_size_t size) { mpn_cache_fill (ptr, size); - /* +#if 0 mpn_random (ptr, size); - */ +#endif - /* +#if 0 mp_size_t i; for (i = 0; i < size; i++) ptr[i] = i; - */ +#endif } @@ -280,9 +282,8 @@ speed_tmp_alloc_adjust (void *ptr, mp_size_t align) speed_starttime() and speed_endtime() are put tight around the code to be measured. Any setups are done outside the timed portion. - speed_measure() does an mpn_cache_fill() on the two source operands, but - a routine is free to prime the CPU cache however it wants. - + Each routine is responsible for its own cache priming. + speed_cache_fill() is a good way to do this, see examples in speed.h. One cache priming possibility, for CPUs with write-allocate cache, and functions that don't take too long, is to do one dummy call before timing so as to cache everything that gets used. But speed_measure() runs a @@ -290,9 +291,9 @@ speed_tmp_alloc_adjust (void *ptr, mp_size_t align) be necessary. Data alignment will be important, for source, destination and temporary - workspace. A routine here can align its destination and workspace. - Programs using these routines should ensure s->xp and s->yp are aligned. - Aligning onto a CACHE_LINE_SIZE boundary is suggested. + workspace. A routine can align its destination and workspace. Programs + using the routines will ensure s->xp and s->yp are aligned. Aligning + onto a CACHE_LINE_SIZE boundary is suggested. The effects of cache priming and data alignment are particularly noticable on fast routines. Ensure these are consistent between routines @@ -312,12 +313,9 @@ speed_tmp_alloc_adjust (void *ptr, mp_size_t align) future. Routines should ignore anything they don't use. s->size can be used creatively, and s->xp and s->yp can be ignored. For - example, speed_mpz_fac_ui() uses s->size as n for the factorial, and - speed_jacobi_base() uses s->size as a size in bits. - - s->r is just a user-supplied parameter. speed_mpn_lshift() uses it as a - shift, speed_mpn_mul_1() uses it as a multiplier, speed_mpn_mul_n_toom() - uses it as a split etc. + example, speed_mpz_fac_ui() uses s->size as n for the factorial. s->r is + just a user-supplied parameter. speed_mpn_lshift() uses it as a shift, + speed_mpn_mul_1() uses it as a multiplier. */ @@ -348,17 +346,26 @@ SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_sub_n, mpn_sub_n, 1) SPEED_ROUTINE_MPN_BINARY_N_SELF (speed_mpn_add_n_self, mpn_add_n, 1) SPEED_ROUTINE_MPN_BINARY_N_INPLACE (speed_mpn_add_n_inplace, mpn_add_n, 1) -SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_and_n, 0, - mpn_and_n (wp,s->xp,s->yp,s->size)) -#if 0 -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_andn_n, mpn_andn_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_nand_n, mpn_nand_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_ior_n, mpn_ior_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_iorn_n, mpn_iorn_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_nior_n, mpn_nior_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_xor_n, mpn_xor_n, 1) -SPEED_ROUTINE_MPN_BINARY_N (speed_mpn_xnor_n, mpn_xnor_n, 1) -#endif +/* mpn_and_n etc can be macros and so have to be handled with + SPEED_ROUTINE_MPN_BINARY_N_CALL forms */ +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_and_n, 0, + mpn_and_n (wp,s->xp,s->yp,s->size)) +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_andn_n, 0, + mpn_andn_n (wp,s->xp,s->yp,s->size)) +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_nand_n, 0, + mpn_nand_n (wp,s->xp,s->yp,s->size)) + +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_ior_n, 0, + mpn_ior_n (wp,s->xp,s->yp,s->size)) +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_iorn_n, 0, + mpn_iorn_n (wp,s->xp,s->yp,s->size)) +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_nior_n, 0, + mpn_nior_n (wp,s->xp,s->yp,s->size)) + +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_xor_n, 0, + mpn_xor_n (wp,s->xp,s->yp,s->size)) +SPEED_ROUTINE_MPN_BINARY_N_CALL (speed_mpn_xnor_n, 0, + mpn_xnor_n (wp,s->xp,s->yp,s->size)) SPEED_ROUTINE_MPZ_UI (speed_mpz_fac_ui, mpz_fac_ui) SPEED_ROUTINE_MPZ_UI (speed_mpz_fib_ui, mpz_fib_ui) @@ -370,10 +377,11 @@ SPEED_ROUTINE_MPN_SQR (speed_mpn_sqr_n, mpn_sqr_n, 1) SPEED_ROUTINE_MPN_SQR_CALL (speed_mpn_mul_n_sqr, 1, mpn_mul_n(wp,s->xp,s->xp,s->size), 1) -/* FIXME: The generic basecase code has size restrictions. */ +/* FIXME: size restrictions on some sqr_basecase versions */ SPEED_ROUTINE_MPN_MUL_BASECASE(speed_mpn_mul_basecase, mpn_mul_basecase, 1) SPEED_ROUTINE_MPN_SQR (speed_mpn_sqr_basecase, mpn_sqr_basecase, 1) +/* FIXME: size restrictions on kara */ SPEED_ROUTINE_MPN_MUL_N_CALL (speed_mpn_kara_mul_n, 1, mpn_kara_mul_n(wp,s->xp,s->xp,s->size,tspace), MPN_KARA_MUL_N_TSIZE (s->size)) |