diff options
author | Kevin Ryde <user42@zip.com.au> | 2001-06-12 00:10:16 +0200 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2001-06-12 00:10:16 +0200 |
commit | 9db31ead5fdcae4dfd83b85aa28e32945797848c (patch) | |
tree | c5b5b5fa446729e4b6bb9087700fdf2c2406a81b /longlong.h | |
parent | 9bd299a2dbd9ab9a9f3fffa1d755a5fdbab8d972 (diff) | |
download | gmp-9db31ead5fdcae4dfd83b85aa28e32945797848c.tar.gz |
* longlong.h (count_leading_zeros) [pentiumpro]: Work around a partial
register stall on gcc < 3.
Diffstat (limited to 'longlong.h')
-rw-r--r-- | longlong.h | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/longlong.h b/longlong.h index 0b33bd4a0..fb49f7014 100644 --- a/longlong.h +++ b/longlong.h @@ -615,6 +615,22 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); depending where the least significant 1 bit is. */ #else +/* gcc on p6 prior to 3.0 generates a partial register stall for __cbtmp^31, + due to using "xorb $31" instead of "xorl $31", the former being 1 code + byte smaller. "31-__cbtmp" is a workaround, probably at the cost of one + extra instruction. Do this for "i386" too, since that means generic + x86. */ +#if __GNUC__ < 3 \ + && (HAVE_HOST_CPU_i386 || HAVE_HOST_CPU_pentiumpro \ + || HAVE_HOST_CPU_pentium2 || HAVE_HOST_CPU_pentium3) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + ASSERT ((x) != 0); \ + __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = 31 - __cbtmp; \ + } while (0) +#else #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ @@ -622,6 +638,8 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ (count) = __cbtmp ^ 31; \ } while (0) +#endif \ + \ #define count_trailing_zeros(count, x) \ do { \ ASSERT ((x) != 0); \ |