summaryrefslogtreecommitdiff
path: root/longlong.h
diff options
context:
space:
mode:
Diffstat (limited to 'longlong.h')
-rw-r--r--longlong.h40
1 files changed, 32 insertions, 8 deletions
diff --git a/longlong.h b/longlong.h
index dc8d2dd56..7624d6709 100644
--- a/longlong.h
+++ b/longlong.h
@@ -1058,14 +1058,15 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
: "=r" (sh), "=&r" (sl) \
: "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \
"1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
-#if defined (HAVE_MULX)
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulx %3, %0, %1" \
+ __asm__ ("mulx\t%3, %0, %1" \
: "=r" (w0), "=r" (w1) \
: "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
#else
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("mulq %3" \
+ __asm__ ("mulq\t%3" \
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
#endif
@@ -1073,21 +1074,44 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
__asm__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
-/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
+
+#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \
+ || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2 \
+ || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen \
+ || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar
+#define count_leading_zeros(count, x) \
+ do { \
+ /* This is lzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 64
+#else
#define count_leading_zeros(count, x) \
do { \
UDItype __cbtmp; \
ASSERT ((x) != 0); \
- __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
(count) = __cbtmp ^ 63; \
} while (0)
-/* bsfq destination must be a 64-bit register, "%q0" forces this in case
- count is only an int. */
+#endif
+
+#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \
+ || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar
+#define count_trailing_zeros(count, x) \
+ do { \
+ /* This is tzcnt, spelled for older assemblers. Destination and */ \
+ /* source must be a 64-bit registers, hence cast and %q. */ \
+ __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ } while (0)
+#define COUNT_TRAILING_ZEROS_0 64
+#else
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \
- __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \
+ __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \
} while (0)
+#endif
#endif /* __amd64__ */
#if defined (__i860__) && W_TYPE_SIZE == 32