summaryrefslogtreecommitdiff
path: root/longlong.h
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2004-04-24 02:28:52 +0200
committerKevin Ryde <user42@zip.com.au>2004-04-24 02:28:52 +0200
commita5c63296e6bf536c2d4e0efe5bc0ccdc695202e6 (patch)
tree13e3574e9f26231849264fb9060dd44dafe4d88d /longlong.h
parent27744cc007127d0e5604e1fce6b032e00466f897 (diff)
downloadgmp-a5c63296e6bf536c2d4e0efe5bc0ccdc695202e6.tar.gz
* longlong.h (count_leading_zeros_gcc_clz,
count_trailing_zeros_gcc_ctz): New macros. (count_leading_zeros, count_trailing_zeros) [x86]: Use them on gcc 3.4.
Diffstat (limited to 'longlong.h')
-rw-r--r--longlong.h68
1 files changed, 65 insertions, 3 deletions
diff --git a/longlong.h b/longlong.h
index 1b1eb3bb8..38ce90774 100644
--- a/longlong.h
+++ b/longlong.h
@@ -119,6 +119,54 @@ MA 02111-1307, USA. */
Please add support for more CPUs here, or improve the current support
for the CPUs below! */
+
+/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc
+ 3.4 __builtin_clzl or __builtin_clzll, according to our limb size.
+ Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or
+ __builtin_ctzll.
+
+ These builtins are only used when we check what code comes out, on some
+ chips they're merely libgcc calls, where we will instead want an inline
+ in that case (either asm or generic C).
+
+ These builtins are better than an asm block of the same insn, since an
+ asm block doesn't give gcc any information about scheduling or resource
+ usage. We keep an asm block for use on prior versions of gcc though.
+
+ For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but
+ it's not used (for count_leading_zeros) because it generally gives extra
+ code to ensure the result is 0 when the input is 0, which we don't need
+ or want. */
+
+#ifdef _LONG_LONG_LIMB
+#define count_leading_zeros_gcc_clz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_clzll (x); \
+ } while (0)
+#else
+#define count_leading_zeros_gcc_clz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_clzl (x); \
+ } while (0)
+#endif
+
+#ifdef _LONG_LONG_LIMB
+#define count_trailing_zeros_gcc_ctz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_ctzll (x); \
+ } while (0)
+#else
+#define count_trailing_zeros_gcc_ctz(count,x) \
+ do { \
+ ASSERT ((x) != 0); \
+ (count) = __builtin_ctzl (x); \
+ } while (0)
+#endif
+
+
/* FIXME: The macros using external routines like __MPN(count_leading_zeros)
don't need to be under !NO_ASM */
#if ! defined (NO_ASM)
@@ -681,12 +729,17 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
#endif /* pentiummx */
#else /* ! pentium */
+
+#if __GMP_GNUC_PREREQ (3,4) /* using bsrl */
+#define count_leading_zeros(count,x) count_leading_zeros_gcc_clz(count,x)
+#endif /* gcc clz */
+
/* On P6, gcc prior to 3.0 generates a partial register stall for
__cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the
cost of one extra instruction. Do this for "i386" too, since that means
generic x86. */
-#if __GNUC__ < 3 \
+#if ! defined (count_leading_zeros) && __GNUC__ < 3 \
&& (HAVE_HOST_CPU_i386 \
|| HAVE_HOST_CPU_i686 \
|| HAVE_HOST_CPU_pentiumpro \
@@ -699,7 +752,9 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
__asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
(count) = 31 - __cbtmp; \
} while (0)
-#else
+#endif /* gcc<3 asm bsrl */
+
+#ifndef count_leading_zeros
#define count_leading_zeros(count, x) \
do { \
USItype __cbtmp; \
@@ -707,13 +762,20 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
__asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
(count) = __cbtmp ^ 31; \
} while (0)
-#endif
+#endif /* asm bsrl */
+
+#if __GMP_GNUC_PREREQ (3,4) /* using bsfl */
+#define count_trailing_zeros(count,x) count_trailing_zeros_gcc_ctz(count,x)
+#endif /* gcc ctz */
+#ifndef count_trailing_zeros
#define count_trailing_zeros(count, x) \
do { \
ASSERT ((x) != 0); \
__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \
} while (0)
+#endif /* asm bsfl */
+
#endif /* ! pentium */
#ifndef UMUL_TIME