summaryrefslogtreecommitdiff
path: root/tune/modlinv.c
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2000-09-05 23:39:48 +0200
committerKevin Ryde <user42@zip.com.au>2000-09-05 23:39:48 +0200
commite01cffd82ff7869b1e4365d9d747f226cec7bf98 (patch)
tree7c2ca5100e9415884375d701c9cc24807f201708 /tune/modlinv.c
parent32da6f015b1a607de104364c8a7ddfc6997438aa (diff)
downloadgmp-e01cffd82ff7869b1e4365d9d747f226cec7bf98.tar.gz
* tune/modlinv.c: New file with alternate modlimb_inverts.
Diffstat (limited to 'tune/modlinv.c')
-rw-r--r--tune/modlinv.c170
1 files changed, 170 insertions, 0 deletions
diff --git a/tune/modlinv.c b/tune/modlinv.c
new file mode 100644
index 000000000..da701c76d
--- /dev/null
+++ b/tune/modlinv.c
@@ -0,0 +1,170 @@
+/* Alternate implementations of modlimb_invert to compare speeds. */
+
+/*
+Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "speed.h"
+
+
+/* Like the standard version in gmp-impl.h, but with the expressions using a
+ "1-" form. This has the same number of steps, but "1-" is on the
+ dependent chain, whereas the "2*" in the standard version isn't.
+ Depending on the CPU this should be the same or a touch slower. */
+
+#if BITS_PER_MP_LIMB <= 32
+#define modlimb_invert_mul1(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
+#define modlimb_invert_mul1(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __inv; \
+ ASSERT ((__n & 1) == 1); \
+ __inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 16 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 32 */ \
+ __inv = (1 - __n * __inv) * __inv + __inv; /* 64 */ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+#endif
+
+
+/* The loop based version used in GMP 3.0 and earlier. Usually slower than
+ multiplying, due to the number of steps that must be performed. Much
+ slower when the processor has a good multiply. */
+
+#define modlimb_invert_loop(inv,n) \
+ do { \
+ mp_limb_t __v = (n); \
+ mp_limb_t __v_orig = __v; \
+ mp_limb_t __make_zero = 1; \
+ mp_limb_t __two_i = 1; \
+ mp_limb_t __v_inv = 0; \
+ \
+ ASSERT ((__n & 1) == 1); \
+ \
+ do \
+ { \
+ while ((__two_i & __make_zero) == 0) \
+ __two_i <<= 1, __v <<= 1; \
+ __v_inv += __two_i; \
+ __make_zero -= __v; \
+ } \
+ while (__make_zero); \
+ \
+ ASSERT (__v_orig * __v_inv == 1); \
+ (inv) = __v_inv; \
+ } while (0)
+
+
+/* Another loop based version with conditionals, but doing a fixed number of
+ steps. */
+
+#define modlimb_invert_cond(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __rem = (1 - __n) >> 1; \
+ mp_limb_t __inv = MP_LIMB_T_HIGHBIT; \
+ int __count; \
+ \
+ ASSERT ((__n & 1) == 1); \
+ \
+ __count = BITS_PER_MP_LIMB-1; \
+ do \
+ { \
+ __inv >>= 1; \
+ if (__rem & 1) \
+ { \
+ __inv |= MP_LIMB_T_HIGHBIT; \
+ __rem -= __n; \
+ } \
+ __rem >>= 1; \
+ } \
+ while (-- __count); \
+ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+
+
+/* Another loop based bitwise version, but purely arithmetic, no
+ conditionals. */
+
+#define modlimb_invert_arith(inv,n) \
+ do { \
+ mp_limb_t __n = (n); \
+ mp_limb_t __rem = (1 - __n) >> 1; \
+ mp_limb_t __inv = MP_LIMB_T_HIGHBIT; \
+ mp_limb_t __lowbit; \
+ int __count; \
+ \
+ ASSERT ((__n & 1) == 1); \
+ \
+ __count = BITS_PER_MP_LIMB-1; \
+ do \
+ { \
+ __lowbit = __rem & 1; \
+ __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \
+ __rem = (__rem - (__n & -__lowbit)) >> 1; \
+ } \
+ while (-- __count); \
+ \
+ ASSERT (__inv * __n == 1); \
+ (inv) = __inv; \
+ } while (0)
+
+
+double
+speed_modlimb_invert_mul1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_mul1);
+}
+double
+speed_modlimb_invert_loop (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_loop);
+}
+double
+speed_modlimb_invert_cond (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_cond);
+}
+double
+speed_modlimb_invert_arith (struct speed_params *s)
+{
+ SPEED_ROUTINE_MODLIMB_INVERT (modlimb_invert_arith);
+}