summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2020-06-10 10:39:09 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2020-06-10 10:39:09 +0000
commit4e9bec80b4d11e4587110805d4e2655cf83ee4e3 (patch)
tree36fdd1e0cb983a03a052cda6e29c7d52c334401e
parent9dbd638ae4c484e7faa51d10f238e1c942be7fa5 (diff)
downloadmpfr-4e9bec80b4d11e4587110805d4e2655cf83ee4e3.tar.gz
[src/mpfr-longlong.h] Update: applied the diff of GMP's longlong.h
between GMP 6.1.0 and GMP 6.2.0, after reverting changesets 13251, 11042, 11025, and 10727, as these changes are present in the diff. git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@13962 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r--src/mpfr-longlong.h306
1 files changed, 185 insertions, 121 deletions
diff --git a/src/mpfr-longlong.h b/src/mpfr-longlong.h
index d773b4b53..ce844f99b 100644
--- a/src/mpfr-longlong.h
+++ b/src/mpfr-longlong.h
@@ -200,7 +200,6 @@ https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
(pl) = __m0 * __m1; \
} while (0)
#endif
-#define UMUL_TIME 18
#else /* ! __GNUC__ */
#include <machine/builtins.h>
#define umul_ppmm(ph, pl, m0, m1) \
@@ -218,7 +217,6 @@ https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
} while (0)
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
-#define UDIV_TIME 220
#endif /* LONGLONG_STANDALONE */
/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm
@@ -281,7 +279,6 @@ long __MPN(count_leading_zeros) (UDItype);
#include <intrinsics.h>
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
-#define UDIV_TIME 220
long __MPN(count_leading_zeros) (UDItype);
#define count_leading_zeros(count, x) \
((count) = _leadz ((UWtype) (x)))
@@ -325,7 +322,6 @@ long __MPN(count_leading_zeros) (UDItype);
__asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
: "=&f" (ph), "=f" (pl) \
: "f" (m0), "f" (m1))
-#define UMUL_TIME 14
#define count_leading_zeros(count, x) \
do { \
UWtype _x = (x), _y, _a, _c; \
@@ -368,7 +364,6 @@ long __MPN(count_leading_zeros) (UDItype);
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
#endif
-#define UDIV_TIME 220
#endif
@@ -414,8 +409,6 @@ long __MPN(count_leading_zeros) (UDItype);
#define COUNT_LEADING_ZEROS_0 32
#endif /* __a29k__ */
-/* MPFR: changed "J" constraint to "Cal" constraint
- (https://sympa.inria.fr/sympa/arc/mpfr/2018-10/msg00010.html) */
#if defined (__arc__)
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \
@@ -438,12 +431,50 @@ long __MPN(count_leading_zeros) (UDItype);
#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
&& W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ do { \
+ if (__builtin_constant_p (bl) && -(USItype)(bl) < 0x100) \
+ __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), \
+ "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC); \
+ else \
+ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \
+ } while (0)
+/* FIXME: Extend the immediate range for the low word by using both ADDS and
+ SUBS, since they set carry in the same way. Note: We need separate
+ definitions for thumb and non-thumb to to th absense of RSC under thumb. */
+#if defined (__thumb__)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \
+ && (ah) == (bh)) \
+ __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \
+ : "=r" (sh), "=r" (sl) \
+ : "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (al)) \
+ __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (bl)) \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
+ } while (0)
+#else
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
- if (__builtin_constant_p (al)) \
+ if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \
+ && (ah) == (bh)) \
+ __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \
+ : "=r" (sh), "=r" (sl) \
+ : "r" (al), "rI" (bl) __CLOBBER_CC); \
+ else if (__builtin_constant_p (al)) \
{ \
if (__builtin_constant_p (ah)) \
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
@@ -466,21 +497,15 @@ long __MPN(count_leading_zeros) (UDItype);
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
} \
else if (__builtin_constant_p (bl)) \
- { \
- if (__builtin_constant_p (bh)) \
- __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
- else \
- __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
- : "=r" (sh), "=&r" (sl) \
- : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
- } \
+ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
else /* only bh might be a constant */ \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
+ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
} while (0)
+#endif
#if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \
|| defined (__ARM_ARCH_3__)
#define umul_ppmm(xh, xl, a, b) \
@@ -503,18 +528,17 @@ long __MPN(count_leading_zeros) (UDItype);
"=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
: "r" ((USItype) (a)), "r" ((USItype) (b)) __CLOBBER_CC); \
} while (0)
-#define UMUL_TIME 20
+#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UWtype __r; \
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
-#define UDIV_TIME 200
+#endif /* LONGLONG_STANDALONE */
#else /* ARMv4 or newer */
#define umul_ppmm(xh, xl, a, b) \
__asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
-#define UMUL_TIME 5
#define smul_ppmm(xh, xl, a, b) \
__asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
#ifndef LONGLONG_STANDALONE
@@ -525,36 +549,60 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
} while (0)
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
-#define UDIV_TIME 70
#endif /* LONGLONG_STANDALONE */
#endif /* defined(__ARM_ARCH_2__) ... */
#define count_leading_zeros(count, x) count_leading_zeros_gcc_clz(count, x)
#define count_trailing_zeros(count, x) count_trailing_zeros_gcc_ctz(count, x)
-#define COUNT_LEADING_ZEROS_0 32
#endif /* __arm__ */
#if defined (__aarch64__) && W_TYPE_SIZE == 64
/* FIXME: Extend the immediate range for the low word by using both
ADDS and SUBS, since they set carry in the same way. */
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
- __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
- : "=r" (sh), "=&r" (sl) \
- : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
+ do { \
+ if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
+ __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
+ "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
+ else \
+ __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
+ } while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
- __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
- : "=r,r" (sh), "=&r,&r" (sl) \
- : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
- "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC)
+ do { \
+ if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000) \
+ __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
+ : "=r,r" (sh), "=&r,&r" (sl) \
+ : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
+ "r,Z" ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\
+ else \
+ __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
+ : "=r,r" (sh), "=&r,&r" (sl) \
+ : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \
+ "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC);\
+ } while(0);
+#if __GMP_GNUC_PREREQ (4,9)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
+ __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \
+ w1 = __ll >> 64; \
+ w0 = __ll; \
+ } while (0)
+#endif
+#if !defined (umul_ppmm)
#define umul_ppmm(ph, pl, m0, m1) \
do { \
UDItype __m0 = (m0), __m1 = (m1); \
__asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (__m0), "r" (__m1)); \
(pl) = __m0 * __m1; \
} while (0)
+#endif
#define count_leading_zeros(count, x) count_leading_zeros_gcc_clz(count, x)
#define count_trailing_zeros(count, x) count_trailing_zeros_gcc_ctz(count, x)
-#define COUNT_LEADING_ZEROS_0 64
#endif /* __aarch64__ */
#if defined (__clipper__) && W_TYPE_SIZE == 32
@@ -646,11 +694,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
(wh) = __x.__i.__h; \
(wl) = __x.__i.__l; \
} while (0)
-#define UMUL_TIME 8
-#define UDIV_TIME 60
-#else
-#define UMUL_TIME 40
-#define UDIV_TIME 80
#endif
#define count_leading_zeros(count, x) \
do { \
@@ -936,7 +979,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
double d; \
unsigned a[2]; \
} __u; \
- ASSERT ((n) != 0); \
__u.d = (UWtype) (n); \
(c) = 0x3FF + 31 - (__u.a[1] >> 20); \
} while (0)
@@ -993,12 +1035,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
#endif /* ! pentium */
-#ifndef UMUL_TIME
-#define UMUL_TIME 10
-#endif
-#ifndef UDIV_TIME
-#define UDIV_TIME 40
-#endif
#endif /* 80x86 */
#if defined (__amd64__) && W_TYPE_SIZE == 64
@@ -1012,29 +1048,61 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \
"1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#if X86_ASM_MULX \
+ && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \
+ || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen)
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulx\t%3, %0, %1" \
+ : "=r" (w0), "=r" (w1) \
+ : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#else
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulq %3" \
+ __asm__ ("mulq\t%3" \
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#endif
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
__asm__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2 \
+ || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen \
+ || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x) \
+ do { \
+ /* This is lzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
#define count_leading_zeros(count, x) \
do { \
UDItype __cbtmp; \
ASSERT ((x) != 0); \
- __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
(count) = __cbtmp ^ 63; \
} while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
- count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+ || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x) \
+ do { \
+ /* This is tzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \
- __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
} while (0)
+#endif
#endif /* __amd64__ */
#if defined (__i860__) && W_TYPE_SIZE == 32
@@ -1115,12 +1183,10 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("mulu%.l %3,%1:%0" \
: "=d" (w0), "=d" (w1) \
: "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
-#define UMUL_TIME 45
#define udiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("divu%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
-#define UDIV_TIME 90
#define sdiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("divs%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
@@ -1154,8 +1220,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
} while (0)
-#define UMUL_TIME 100
-#define UDIV_TIME 400
#endif /* not mc68020 */
/* The '020, '030, '040 and '060 have bitfield insns.
GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to
@@ -1208,11 +1272,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("divu.d %0,%1,%2" \
: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
-#define UMUL_TIME 5
-#define UDIV_TIME 25
-#else
-#define UMUL_TIME 17
-#define UDIV_TIME 150
#endif /* __m88110__ */
#endif /* __m88000__ */
@@ -1234,12 +1293,18 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
: "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
#endif
-#define UMUL_TIME 10
-#define UDIV_TIME 100
#endif /* __mips */
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if __GMP_GNUC_PREREQ (4,4)
+#if defined (_MIPS_ARCH_MIPS64R6)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UDItype __m0 = (u), __m1 = (v); \
+ (w0) = __m0 * __m1; \
+ __asm__ ("dmuhu\t%0, %1, %2" : "=d" (w1) : "d" (__m0), "d" (__m1)); \
+ } while (0)
+#endif
+#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (4,4)
#define umul_ppmm(w1, w0, u, v) \
do { \
typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
@@ -1260,8 +1325,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
: "=d" (w0), "=d" (w1) \
: "d" ((UDItype)(u)), "d" ((UDItype)(v)))
#endif
-#define UMUL_TIME 20
-#define UDIV_TIME 140
#endif /* __mips */
#if defined (__mmix__) && W_TYPE_SIZE == 64
@@ -1326,33 +1389,41 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
__asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
else \
__asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
- : "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl) \
+ __CLOBBER_CC); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
- : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
else \
__asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
- : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl) \
+ __CLOBBER_CC); \
} while (0)
#define count_leading_zeros(count, x) \
__asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
@@ -1374,23 +1445,17 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
(pl) = __m0 * __m1; \
} while (0)
#endif
-#define UMUL_TIME 15
#define smul_ppmm(ph, pl, m0, m1) \
do { \
SItype __m0 = (m0), __m1 = (m1); \
__asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
(pl) = __m0 * __m1; \
} while (0)
-#define SMUL_TIME 14
-#define UDIV_TIME 120
#else
-#define UMUL_TIME 8
#define smul_ppmm(xh, xl, m0, m1) \
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
-#define SMUL_TIME 4
#define sdiv_qrnnd(q, r, nh, nl, d) \
__asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
-#define UDIV_TIME 100
#endif
#endif /* 32-bit POWER architecture variants. */
@@ -1406,17 +1471,20 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else \
__asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "%r" ((UDItype)(al)), "rI" ((UDItype)(bl))); \
+ "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
} while (0)
/* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
This might seem strange, but gcc folds away the dead code late. */
@@ -1427,60 +1495,70 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
else \
__asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl)))); \
+ "rI" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \
+ __CLOBBER_CC); \
} else { \
if (__builtin_constant_p (ah) && (ah) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
__asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
else \
__asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \
- "rI" ((UDItype)(al)), "r" ((UDItype)(bl))); \
+ "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \
+ __CLOBBER_CC); \
} \
} while (0)
#endif /* ! _LONG_LONG_LIMB */
#define count_leading_zeros(count, x) \
__asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
#define COUNT_LEADING_ZEROS_0 64
-#if 0 && __GMP_GNUC_PREREQ (4,4) /* Disable, this results in libcalls! */
+#if __GMP_GNUC_PREREQ (4,8)
#define umul_ppmm(w1, w0, u, v) \
do { \
typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \
@@ -1497,15 +1575,12 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
(pl) = __m0 * __m1; \
} while (0)
#endif
-#define UMUL_TIME 15
#define smul_ppmm(ph, pl, m0, m1) \
do { \
DItype __m0 = (m0), __m1 = (m1); \
__asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1)); \
(pl) = __m0 * __m1; \
} while (0)
-#define SMUL_TIME 14 /* ??? */
-#define UDIV_TIME 120 /* ??? */
#endif /* 64-bit PowerPC. */
#if defined (__pyr__) && W_TYPE_SIZE == 32
@@ -1566,8 +1641,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
: "=r" (ph), "=r" (pl) \
: "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
: "r2")
-#define UMUL_TIME 20
-#define UDIV_TIME 200
#define count_leading_zeros(count, x) \
do { \
if ((x) >= 0x10000) \
@@ -1582,11 +1655,19 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
} while (0)
#endif /* RT/ROMP */
+#if defined (__riscv64) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, u, v) \
+ do { \
+ UDItype __u = (u), __v = (v); \
+ (pl) = __u * __v; \
+ __asm__ ("mulhu\t%2, %1, %0" : "=r" (ph) : "%r" (__u), "r" (__v)); \
+ } while (0)
+#endif
+
#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
-#define UMUL_TIME 5
#endif
#if defined (__sparc__) && W_TYPE_SIZE == 32
@@ -1634,10 +1715,8 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
but INTERPRETED AS UNSIGNED. */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
-#define UMUL_TIME 5
#if HAVE_HOST_CPU_supersparc
-#define UDIV_TIME 60 /* SuperSPARC timing */
#else
/* Don't use this on SuperSPARC because its udiv only handles 53 bit
dividends and will trap to the kernel for the rest. */
@@ -1649,7 +1728,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
(r) = (n0) - __q * (d); \
(q) = __q; \
} while (0)
-#define UDIV_TIME 25
#endif /* HAVE_HOST_CPU_supersparc */
#else /* ! __sparc_v8__ */
@@ -1658,7 +1736,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
instructions scan (ffs from high bit) and divscc. */
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
-#define UMUL_TIME 5
#define udiv_qrnnd(q, r, n1, n0, d) \
__asm__ ("! Inlined udiv_qrnnd\n" \
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
@@ -1701,7 +1778,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
"1: ! End of inline udiv_qrnnd" \
: "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
: "%g1" __AND_CLOBBER_CC)
-#define UDIV_TIME 37
#define count_leading_zeros(count, x) \
__asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
/* Early sparclites return 63 for an argument of 0, but they warn that future
@@ -1755,7 +1831,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
" rd %%y,%1" \
: "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
: "%g1", "%g2" __AND_CLOBBER_CC)
-#define UMUL_TIME 39 /* 39 instructions */
#endif
#ifndef udiv_qrnnd
#ifndef LONGLONG_STANDALONE
@@ -1765,9 +1840,6 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
(r) = __r; \
} while (0)
extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
-#ifndef UDIV_TIME
-#define UDIV_TIME 140
-#endif
#endif /* LONGLONG_STANDALONE */
#endif /* udiv_qrnnd */
#endif /* __sparc__ */
@@ -2102,7 +2174,8 @@ extern __longlong_h_C UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *)
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
__udiv_w_sdiv (defined in libgcc or elsewhere). */
-#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) && !defined(LONGLONG_STANDALONE)
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) \
+ && ! defined (LONGLONG_STANDALONE)
#define udiv_qrnnd(q, r, nh, nl, d) \
do { \
UWtype __r; \
@@ -2202,12 +2275,3 @@ extern const unsigned char __MPFR_DECLSPEC __clz_tab[129];
#ifndef UDIV_PREINV_ALWAYS
#define UDIV_PREINV_ALWAYS 0
#endif
-
-/* Give defaults for UMUL_TIME and UDIV_TIME. */
-#ifndef UMUL_TIME
-#define UMUL_TIME 1
-#endif
-
-#ifndef UDIV_TIME
-#define UDIV_TIME UMUL_TIME
-#endif