diff options
author | Kevin Ryde <user42@zip.com.au> | 2000-09-05 23:39:48 +0200 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2000-09-05 23:39:48 +0200 |
commit | e01cffd82ff7869b1e4365d9d747f226cec7bf98 (patch) | |
tree | 7c2ca5100e9415884375d701c9cc24807f201708 /tune/modlinv.c | |
parent | 32da6f015b1a607de104364c8a7ddfc6997438aa (diff) | |
download | gmp-e01cffd82ff7869b1e4365d9d747f226cec7bf98.tar.gz |
* tune/modlinv.c: New file with alternate modlimb_inverts.
Diffstat (limited to 'tune/modlinv.c')
-rw-r--r-- | tune/modlinv.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/tune/modlinv.c b/tune/modlinv.c new file mode 100644 index 000000000..da701c76d --- /dev/null +++ b/tune/modlinv.c @@ -0,0 +1,170 @@ +/* Alternate implementations of modlimb_invert to compare speeds. */ + +/* +Copyright (C) 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. +*/ + +#include <stdio.h> +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" +#include "speed.h" + + +/* Like the standard version in gmp-impl.h, but with the expressions using a + "1-" form. This has the same number of steps, but "1-" is on the + dependent chain, whereas the "2*" in the standard version isn't. + Depending on the CPU this should be the same or a touch slower. */ + +#if BITS_PER_MP_LIMB <= 32 +#define modlimb_invert_mul1(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __inv; \ + ASSERT ((__n & 1) == 1); \ + __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ + __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \ + __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) +#endif + +#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64 +#define modlimb_invert_mul1(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __inv; \ + ASSERT ((__n & 1) == 1); \ + __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \ + __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \ + __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \ + __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) +#endif + + +/* The loop based version used in GMP 3.0 and earlier. Usually slower than + multiplying, due to the number of steps that must be performed. Much + slower when the processor has a good multiply. */ + +#define modlimb_invert_loop(inv,n) \ + do { \ + mp_limb_t __v = (n); \ + mp_limb_t __v_orig = __v; \ + mp_limb_t __make_zero = 1; \ + mp_limb_t __two_i = 1; \ + mp_limb_t __v_inv = 0; \ + \ + ASSERT ((__n & 1) == 1); \ + \ + do \ + { \ + while ((__two_i & __make_zero) == 0) \ + __two_i <<= 1, __v <<= 1; \ + __v_inv += __two_i; \ + __make_zero -= __v; \ + } \ + while (__make_zero); \ + \ + ASSERT (__v_orig * __v_inv == 1); \ + (inv) = __v_inv; \ + } while (0) + + +/* Another loop based version with conditionals, but doing a fixed number of + steps. */ + +#define modlimb_invert_cond(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __rem = (1 - __n) >> 1; \ + mp_limb_t __inv = MP_LIMB_T_HIGHBIT; \ + int __count; \ + \ + ASSERT ((__n & 1) == 1); \ + \ + __count = BITS_PER_MP_LIMB-1; \ + do \ + { \ + __inv >>= 1; \ + if (__rem & 1) \ + { \ + __inv |= MP_LIMB_T_HIGHBIT; \ + __rem -= __n; \ + } \ + __rem >>= 1; \ + } \ + while (-- __count); \ + \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) + + +/* Another loop based bitwise version, but purely arithmetic, no + conditionals. */ + +#define modlimb_invert_arith(inv,n) \ + do { \ + mp_limb_t __n = (n); \ + mp_limb_t __rem = (1 - __n) >> 1; \ + mp_limb_t __inv = MP_LIMB_T_HIGHBIT; \ + mp_limb_t __lowbit; \ + int __count; \ + \ + ASSERT ((__n & 1) == 1); \ + \ + __count = BITS_PER_MP_LIMB-1; \ + do \ + { \ + __lowbit = __rem & 1; \ + __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \ + __rem = (__rem - (__n & -__lowbit)) >> 1; \ + } \ + while (-- __count); \ + \ + ASSERT (__inv * __n == 1); \ + (inv) = __inv; \ + } while (0) + + +double +speed_modlimb_invert_mul1 (struct speed_params *s) +{ + SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_mul1); +} +double +speed_modlimb_invert_loop (struct speed_params *s) +{ + SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_loop); +} +double +speed_modlimb_invert_cond (struct speed_params *s) +{ + SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_cond); +} +double +speed_modlimb_invert_arith (struct speed_params *s) +{ + SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_arith); +} |