diff options
author | Kevin Ryde <user42@zip.com.au> | 2001-07-17 02:03:23 +0200 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2001-07-17 02:03:23 +0200 |
commit | 6ba5d25ccbdc1e9a7a9d0ed197b157371d1b81c6 (patch) | |
tree | 2d4f927217e9710258d63c6da6015927dc192e92 /gmp-h.in | |
parent | 7b3d163c61274a6a3836f9b89be8f33ec0dbbdec (diff) | |
download | gmp-6ba5d25ccbdc1e9a7a9d0ed197b157371d1b81c6.tar.gz |
* gmp-h.in (__GMPN_AORS_1): Remove x86 and gcc versions, leave just
one version.
(__GMPN_ADD, __GMPN_SUB): New macros, rewrite of mpn_add and mpn_sub.
(mpn_add, mpn_sub): Use them.
(__GMPN_COPY_REST): New macro.
* gmp-h.in, gmp-impl.h, acinclude.m4: Remove __GMP_ASM_L and
__GMP_LSYM_PREFIX, revert to ASM_L in gmp-impl.h and AC_DEFINE of
LSYM_PREFIX.
Diffstat (limited to 'gmp-h.in')
-rw-r--r-- | gmp-h.in | 483 |
1 files changed, 118 insertions, 365 deletions
@@ -23,17 +23,9 @@ MA 02111-1307, USA. */ #ifndef __GMP_H__ -/* Instantiated by configure, for internal use only. - - LSYM_PREFIX depends on the assembler in use and is therefore not compiler - independent, but it's only used for i386 gcc and it seems reasonable to - assume that if gcc is used by an application it will have been used to - build libgmp so we'll have the right setting. Also in reality L or .L - are the candidates, and there's a good chance both will work anyway. */ - +/* Instantiated by configure, for internal use only. */ #if ! __GMP_WITHIN_CONFIGURE #define __GMP_BITS_PER_MP_LIMB @__GMP_BITS_PER_MP_LIMB@ -#define __GMP_LSYM_PREFIX @__GMP_LSYM_PREFIX@ #endif @@ -1372,342 +1364,110 @@ mpf_size (mpf_srcptr f) /**************** mpn inlines ****************/ -/* __GMP_ASM_L gives a local label for a gcc asm block, for use when - temporary local labels like "1:" might not be available, which is the - case for instance on the x86s (the SCO assembler doesn't support them). - - The label generated is made unique by including "%=" which is a unique - number for each insn. This ensures the same name can be used in multiple - asm blocks, perhaps via a macro. Since jumps between asm blocks are not - allowed there's no need for a label to be usable outside a single - block. */ - -#define __GMP_ASM_L(name) __GMP_LSYM_PREFIX "asm_%=_" #name - - -/* The following x86 stuff gives better flags handling than the generic C, - usually saving a register, some code size and a cycle or two. The - emphasis is on something compact and sensible. - - The src!=dst / n!=1 case has separate code for add and sub since the - former can merge cout and n and use a load/add to save one instruction. - Whether that's significant overall is debatable. - - The sbbl to establish cout should be reasonable on all x86s. If it's - about to be added to something then we miss the opportunity to do "adcl - $0, var", but that's inevitable since the carry flag can't be an output. +/* The comments with __GMPN_ADD_1 below apply here too. - This code isn't used for egcs 2.91 since __GMPN_ADD_1 tickles some - reloader bugs (seen in 2.91.66 19990314 on redhat 6.1). + The test for FUNCTION returning 0 should predict well. If it's assumed + {yp,ysize} will usually have a random number of bits then the high limb + won't be full and a carry out will occur a good deal less than 50% of the + time. - Possibilities: + ysize==0 isn't a documented feature, but is used internally in a few + places. - An alternative to sbbl would be setc and let gcc convert QI to SI if - necessary, but on P54 that's a cycle slower, and on P6 it's not clear - whether gcc 2.95.x properly understands partial register stalls. + Producing cout last stops it using up a register during the main part of + the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))" + doesn't seem able to move the true and false legs of the conditional up + to the two places cout is generated. */ - On chips with a good clc, that could be used and the loops driven with a - single jnbe instead of two jumps, to perhaps save a BTB entry. The final - cout=-cout would become cout++. */ - -#if defined (__GNUC__) && ! (__GNUC__ == 2 && __GNUC_MINOR__ == 91) \ - && (defined (__i386__) || defined (__i486__)) \ - && __GMP_BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) - -#define __GMPN_AORS_1_INPLACE(cout, ptr, size, n, aors) \ - do { \ - mp_ptr __dummy1; \ - mp_size_t __dummy2; \ - \ - if (__builtin_constant_p (n) && (n) == 1) \ - { \ - __asm__ __volatile__ \ - (__GMP_ASM_L(top) ":\n" \ - aors " $1, (%1)\n" \ - " jnc " __GMP_ASM_L(done) "\n" \ - " leal 4(%1), %1\n" \ - " decl %2\n" \ - " jnz " __GMP_ASM_L(top) "\n" \ - __GMP_ASM_L(done) ":\n" \ - " sbbl %0, %0\n" \ - : "=r" (cout), \ - "=&r" (__dummy1), \ - "=&rm" (__dummy2) \ - : "1" (ptr), \ - "2" (size) \ - : "memory"); \ - } \ - else \ - { \ - __asm__ __volatile__ \ - ( aors " %5, (%1)\n" \ - " jnc " __GMP_ASM_L(done) "\n" \ - __GMP_ASM_L(top) ":\n" \ - " leal 4(%1), %1\n" \ - " decl %2\n" \ - " jz " __GMP_ASM_L(done) "\n" \ - aors " $1, (%1)\n" \ - " jc " __GMP_ASM_L(top) "\n" \ - __GMP_ASM_L(done) ":\n" \ - " sbbl %0, %0\n" \ - : "=r" (cout), \ - "=&r" (__dummy1), \ - "=&rm" (__dummy2) \ - : "1" (ptr), \ - "2" (size), \ - "ri" (n) \ - : "memory"); \ - } \ - (cout) = -(cout); \ - \ - } while (0) - -#define __GMPN_AORS_1_GENERAL_ONE(cout, dst, src, size, aors) \ - do { \ - mp_ptr __dst; \ - mp_srcptr __src; \ - mp_size_t __size; \ - \ - __asm__ __volatile__ \ - (__GMP_ASM_L(top) ":\n" \ - " movl (%2), %0\n" \ - " addl $4, %2\n" \ - aors " $1, %0\n" \ - " movl %0, (%1)\n" \ - " leal 4(%1), %1\n" \ - " decl %3\n" \ - " jz " __GMP_ASM_L(done) "\n" \ - " jc " __GMP_ASM_L(top) "\n" \ - __GMP_ASM_L(done) ":\n" \ - " sbbl %0, %0\n" \ - : "=&r" (cout), \ - "=&r" (__dst), \ - "=&r" (__src), \ - "=&rm" (__size) \ - : "1" (dst), \ - "2" (src), \ - "3" (size) \ - : "memory"); \ - \ - (cout) = -(cout); \ - if (__src != __dst) \ - __GMPN_COPY (__dst, __src, __size); \ - \ - } while (0) - -#define __GMPN_ADD_1(cout, dst, src, size, n) \ +#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST) \ do { \ - /* ASSERT ((size) >= 1); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ - \ - if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \ - { \ - __GMPN_AORS_1_INPLACE (cout, dst, size, n, "addl"); \ - } \ - else \ - { \ - if (__builtin_constant_p (n) && (n) == 1) \ - { \ - __GMPN_AORS_1_GENERAL_ONE (cout, dst, src, size, "addl"); \ - } \ - else \ - { \ - mp_ptr __dst; \ - mp_srcptr __src; \ - mp_size_t __size; \ - \ - __asm__ __volatile__ \ - (__GMP_ASM_L(top) ":\n" \ - " addl (%2), %0\n" \ - " leal 4(%2), %2\n" \ - " movl %0, (%1)\n" \ - " leal 4(%1), %1\n" \ - " movl $1, %0\n" \ - " decl %3\n" \ - " jz " __GMP_ASM_L(done) "\n" \ - " jc " __GMP_ASM_L(top) "\n" \ - __GMP_ASM_L(done) ":\n" \ - " sbbl %0, %0\n" \ - : "=&r" (cout), \ - "=&r" (__dst), \ - "=&r" (__src), \ - "=&rm" (__size) \ - : "1" (dst), \ - "2" (src), \ - "3" (size), \ - "0" (n) \ - : "memory"); \ + mp_size_t __i; \ + mp_limb_t __x; \ \ - (cout) = -(cout); \ - if (__src != __dst) \ - __GMPN_COPY (__dst, __src, __size); \ - } \ - } \ - } while (0) - -#define __GMPN_SUB_1(cout, dst, src, size, n) \ - do { \ - /* ASSERT ((size) >= 1); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ + /* ASSERT ((ysize) >= 0); */ \ + /* ASSERT ((xsize) >= (ysize)); */ \ + /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */ \ + /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */ \ \ - if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \ + __i = (ysize); \ + if (__i != 0) \ { \ - __GMPN_AORS_1_INPLACE (cout, dst, size, n, "subl"); \ - } \ - else \ - { \ - if (__builtin_constant_p (n) && (n) == 1) \ - { \ - __GMPN_AORS_1_GENERAL_ONE (cout, dst, src, size, "subl"); \ - } \ - else \ + if (FUNCTION (wp, xp, yp, __i)) \ { \ - mp_ptr __dst; \ - mp_srcptr __src; \ - mp_size_t __size; \ - mp_limb_t __dummy; \ - \ - __asm__ __volatile__ \ - (__GMP_ASM_L(top) ":\n" \ - " movl (%2), %0\n" \ - " addl $4, %2\n" \ - " subl %4, %0\n" \ - " movl %0, (%1)\n" \ - " leal 4(%1), %1\n" \ - " movl $1, %4\n" \ - " decl %3\n" \ - " jz " __GMP_ASM_L(done) "\n" \ - " jc " __GMP_ASM_L(top) "\n" \ - __GMP_ASM_L(done) ":\n" \ - " sbbl %0, %0\n" \ - : "=&r" (cout), \ - "=&r" (__dst), \ - "=&r" (__src), \ - "=&rm" (__size), \ - "=&rm" (__dummy) \ - : "1" (dst), \ - "2" (src), \ - "3" (size), \ - "4" (n) \ - : "memory"); \ - \ - (cout) = -(cout); \ - if (__src != __dst) \ - __GMPN_COPY (__dst, __src, __size); \ + do \ + { \ + if (__i >= (xsize)) \ + { \ + (cout) = 1; \ + goto __done; \ + } \ + __x = (xp)[__i]; \ + } \ + while (TEST); \ } \ } \ + if ((wp) != (xp)) \ + __GMPN_COPY_REST (wp, xp, xsize, __i); \ + (cout) = 0; \ + __done: \ + ; \ } while (0) -#endif +#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize) \ + __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n, \ + (((wp)[__i++] = __x + 1) == 0)) +#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize) \ + __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n, \ + (((wp)[__i++] = __x - 1), __x == 0)) -/* The special cases here for src==dst known at compile time are because gcc - (as of 2.95.4) doesn't recognise __src and __dst are identical when - initialized from the same expression, perhaps because they get - incremented (despite being incremented together). The main saving from - this is not needing the __GMPN_COPY code. - The use of __n is designed to keep the code size down. On the second and - subsequent limbs updated it's only a carry of 1 being propagated and that - could be separate code, but it seems better to share one piece of - load/add/store/test since on random data only 1 or 2 limbs normally need - to be touched. +/* The use of __i indexing is designed to ensure a compile time src==dst + remains nice and clear to the compiler, so that __GMPN_COPY_REST can + disappear, and the load/add/store gets a chance to become a + read-modify-write on CISC CPUs. - For constant n==1, __n optimizes down to a constant 1, which saves a few - bytes of code. + The use of __n is designed to keep code size down by sharing the + load/add/store for the first limb and subsequent carry propagation. + There's also a good chance a compile time n==1 input will be recognised + as making __n a constant 1, certainly gcc recognises this. - In __GMPN_SUB_1, constant n==1 might be better off with the "no-borrow" - test as "(dst = src - 1) != MP_LIMB_T_MAX" rather than the current "(dst - = src - 1) <= src", but the difference between the two ought to be - minimal, and in any case ideally gcc would recognise both as checking - unsigned underflow. */ + Alternatives: -#if defined (__GNUC__) && ! defined (__GMPN_ADD_1) -#define __GMPN_AORS_1(cout, dst, src, size, n, TEST) \ - do { \ - mp_ptr __dst = (dst); \ - mp_size_t __size = (size); \ - mp_limb_t __n = (n); \ - mp_limb_t __x; \ - \ - /* ASSERT ((size) >= 1); */ \ - /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ - \ - if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \ - { \ - (cout) = 0; \ - for (;;) \ - { \ - __x = *__dst; \ - if (TEST) \ - break; \ - __n = 1; \ - if (--__size == 0) \ - { \ - (cout) = 1; \ - break; \ - } \ - } \ - } \ - else \ - { \ - mp_srcptr __src = (src); \ - (cout) = 0; \ - for (;;) \ - { \ - __size--; \ - __x = *__src++; \ - if (TEST) \ - { \ - if (__dst != __src) \ - __GMPN_COPY (__dst, __src, __size); \ - break; \ - } \ - __n = 1; \ - if (__size == 0) \ - { \ - (cout) = 1; \ - break; \ - } \ - } \ - } \ - } while (0) + Using a pair of pointers instead of indexing would be possible, but gcc + isn't able to recognise compile-time src==dst in that case, even when the + pointers are incremented more or less together. Other compilers would + very likely have similar difficulty. -#define __GMPN_ADD_1(cout, dst, src, size, n) \ - __GMPN_AORS_1 (cout, dst, src, size, n, (*__dst++ = __x + __n) >= __n) -#define __GMPN_SUB_1(cout, dst, src, size, n) \ - __GMPN_AORS_1 (cout, dst, src, size, n, (*__dst++ = __x - __n) <= __x) -#endif + gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or + similar to detect a compile-time src==dst. This works nicely on gcc + 2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems + to be always false, for a pointer p. But the current code form seems + good enough for src==dst anyway. + gcc on x86 as usual doesn't give particularly good flags handling for the + carry/borrow detection. It's tempting to want some multi instruction asm + blocks to help it, and this was tried, but in truth there's only a few + instructions to save and any gain is all too easily lost by register + juggling setting up for the asm. */ -/* The following is designed to optimize down on non-gcc compilers. The use - of __i ensures a compile time src==dst remains nice and clear, in - particular the __GMPN_COPY will disappear, and the load/add/store gets a - chance to become a read/modify/write on CISC CPUs. The use of __n is as - per the gcc code above and should be recognised as a constant 1 for a - constant n==1. */ - -#ifndef __GMPN_ADD_1 #define __GMPN_AORS_1(cout, dst, src, size, n, TEST) \ do { \ mp_size_t __i; \ - mp_limb_t __n = (n); \ - mp_limb_t __x; \ + mp_limb_t __n, __x; \ \ /* ASSERT ((size) >= 1); */ \ /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ \ (cout) = 0; \ __i = 0; \ + __n = (n); \ for (;;) \ { \ __x = (src)[__i]; \ if (TEST) \ { \ if ((src) != (dst)) \ - { \ - __i++; \ - __GMPN_COPY ((dst)+__i, (src)+__i, (size)-__i); \ - } \ + __GMPN_COPY_REST (dst, src, size, __i+1); \ break; \ } \ __n = 1; \ @@ -1726,7 +1486,6 @@ mpf_size (mpf_srcptr f) #define __GMPN_SUB_1(cout, dst, src, size, n) \ __GMPN_AORS_1(cout, dst, src, size, n, \ ((dst)[__i] = __x - __n) <= __x) -#endif /* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or @@ -1793,21 +1552,53 @@ mpf_size (mpf_srcptr f) } while (0) #endif -/* Enhancement: Use some of the smarter code from gmp-impl.h. Maybe use - mpn_copyi if there's a native version, and if we don't mind demanding - binary compatibility for it (on targets which use it). */ +#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST) +#define __GMPN_COPY_REST(dst, src, size, start) \ + do { \ + /* ASSERT ((start) >= 0); */ \ + /* ASSERT ((start) <= (size)); */ \ + __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \ + } while (0) +#endif -#ifndef __GMPN_COPY -#define __GMPN_COPY(dst, src, size) \ +/* Copy {src,size} to {dst,size}, starting at "start". This is designed to + keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1, + __GMPN_ADD, etc. */ +#if ! defined (__GMPN_COPY_REST) +#define __GMPN_COPY_REST(dst, src, size, start) \ do { \ mp_size_t __j; \ /* ASSERT ((size) >= 0); */ \ + /* ASSERT ((start) >= 0); */ \ + /* ASSERT ((start) <= (size)); */ \ /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \ - for (__j = 0; __j < (size); __j++) \ + for (__j = (start); __j < (size); __j++) \ (dst)[__j] = (src)[__j]; \ } while (0) #endif +/* Enhancement: Use some of the smarter code from gmp-impl.h. Maybe use + mpn_copyi if there's a native version, and if we don't mind demanding + binary compatibility for it (on targets which use it). */ + +#if ! defined (__GMPN_COPY) +#define __GMPN_COPY(dst, src, size) __GMPN_COPY_REST (dst, src, size, 0) +#endif + + +#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add +#if ! __GMP_FORCE_mpn_add +__GMP_EXTERN_INLINE +#endif +mp_limb_t +mpn_add (mp_ptr wp, + mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) +{ + mp_limb_t c; + __GMPN_ADD (c, wp, xp, xsize, yp, ysize); + return c; +} +#endif #if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add_1 #if ! __GMP_FORCE_mpn_add_1 @@ -1835,71 +1626,33 @@ mpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) } #endif -#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub_1 -#if ! __GMP_FORCE_mpn_sub_1 +#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub +#if ! __GMP_FORCE_mpn_sub __GMP_EXTERN_INLINE #endif mp_limb_t -mpn_sub_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t n) +mpn_sub (mp_ptr wp, + mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) { mp_limb_t c; - __GMPN_SUB_1 (c, dst, src, size, n); + __GMPN_SUB (c, wp, xp, xsize, yp, ysize); return c; } #endif - -#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add -#if ! __GMP_FORCE_mpn_add +#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub_1 +#if ! __GMP_FORCE_mpn_sub_1 __GMP_EXTERN_INLINE #endif mp_limb_t -mpn_add (register mp_ptr res_ptr, - register mp_srcptr s1_ptr, - register mp_size_t s1_size, - register mp_srcptr s2_ptr, - register mp_size_t s2_size) +mpn_sub_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t n) { - mp_limb_t cy_limb = 0; - - if (s2_size != 0) - cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size); - - if (s1_size - s2_size != 0) - __GMPN_ADD_1 (cy_limb, - res_ptr + s2_size, - s1_ptr + s2_size, - s1_size - s2_size, - cy_limb); - return cy_limb; + mp_limb_t c; + __GMPN_SUB_1 (c, dst, src, size, n); + return c; } #endif -#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub -#if ! __GMP_FORCE_mpn_sub -__GMP_EXTERN_INLINE -#endif -mp_limb_t -mpn_sub (register mp_ptr res_ptr, - register mp_srcptr s1_ptr, - register mp_size_t s1_size, - register mp_srcptr s2_ptr, - register mp_size_t s2_size) -{ - mp_limb_t cy_limb = 0; - - if (s2_size != 0) - cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size); - - if (s1_size - s2_size != 0) - __GMPN_SUB_1 (cy_limb, - res_ptr + s2_size, - s1_ptr + s2_size, - s1_size - s2_size, - cy_limb); - return cy_limb; -} -#endif #if defined (__cplusplus) |