summaryrefslogtreecommitdiff
path: root/gmp-h.in
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2001-07-17 02:03:23 +0200
committerKevin Ryde <user42@zip.com.au>2001-07-17 02:03:23 +0200
commit6ba5d25ccbdc1e9a7a9d0ed197b157371d1b81c6 (patch)
tree2d4f927217e9710258d63c6da6015927dc192e92 /gmp-h.in
parent7b3d163c61274a6a3836f9b89be8f33ec0dbbdec (diff)
downloadgmp-6ba5d25ccbdc1e9a7a9d0ed197b157371d1b81c6.tar.gz
* gmp-h.in (__GMPN_AORS_1): Remove x86 and gcc versions, leave just
one version. (__GMPN_ADD, __GMPN_SUB): New macros, rewrite of mpn_add and mpn_sub. (mpn_add, mpn_sub): Use them. (__GMPN_COPY_REST): New macro. * gmp-h.in, gmp-impl.h, acinclude.m4: Remove __GMP_ASM_L and __GMP_LSYM_PREFIX, revert to ASM_L in gmp-impl.h and AC_DEFINE of LSYM_PREFIX.
Diffstat (limited to 'gmp-h.in')
-rw-r--r--gmp-h.in483
1 files changed, 118 insertions, 365 deletions
diff --git a/gmp-h.in b/gmp-h.in
index 1cd705df6..5c0759b86 100644
--- a/gmp-h.in
+++ b/gmp-h.in
@@ -23,17 +23,9 @@ MA 02111-1307, USA. */
#ifndef __GMP_H__
-/* Instantiated by configure, for internal use only.
-
- LSYM_PREFIX depends on the assembler in use and is therefore not compiler
- independent, but it's only used for i386 gcc and it seems reasonable to
- assume that if gcc is used by an application it will have been used to
- build libgmp so we'll have the right setting. Also in reality L or .L
- are the candidates, and there's a good chance both will work anyway. */
-
+/* Instantiated by configure, for internal use only. */
#if ! __GMP_WITHIN_CONFIGURE
#define __GMP_BITS_PER_MP_LIMB @__GMP_BITS_PER_MP_LIMB@
-#define __GMP_LSYM_PREFIX @__GMP_LSYM_PREFIX@
#endif
@@ -1372,342 +1364,110 @@ mpf_size (mpf_srcptr f)
/**************** mpn inlines ****************/
-/* __GMP_ASM_L gives a local label for a gcc asm block, for use when
- temporary local labels like "1:" might not be available, which is the
- case for instance on the x86s (the SCO assembler doesn't support them).
-
- The label generated is made unique by including "%=" which is a unique
- number for each insn. This ensures the same name can be used in multiple
- asm blocks, perhaps via a macro. Since jumps between asm blocks are not
- allowed there's no need for a label to be usable outside a single
- block. */
-
-#define __GMP_ASM_L(name) __GMP_LSYM_PREFIX "asm_%=_" #name
-
-
-/* The following x86 stuff gives better flags handling than the generic C,
- usually saving a register, some code size and a cycle or two. The
- emphasis is on something compact and sensible.
-
- The src!=dst / n!=1 case has separate code for add and sub since the
- former can merge cout and n and use a load/add to save one instruction.
- Whether that's significant overall is debatable.
-
- The sbbl to establish cout should be reasonable on all x86s. If it's
- about to be added to something then we miss the opportunity to do "adcl
- $0, var", but that's inevitable since the carry flag can't be an output.
+/* The comments with __GMPN_ADD_1 below apply here too.
- This code isn't used for egcs 2.91 since __GMPN_ADD_1 tickles some
- reloader bugs (seen in 2.91.66 19990314 on redhat 6.1).
+ The test for FUNCTION returning 0 should predict well. If it's assumed
+ {yp,ysize} will usually have a random number of bits then the high limb
+ won't be full and a carry out will occur a good deal less than 50% of the
+ time.
- Possibilities:
+ ysize==0 isn't a documented feature, but is used internally in a few
+ places.
- An alternative to sbbl would be setc and let gcc convert QI to SI if
- necessary, but on P54 that's a cycle slower, and on P6 it's not clear
- whether gcc 2.95.x properly understands partial register stalls.
+ Producing cout last stops it using up a register during the main part of
+ the calculation, though gcc (as of 3.0) on an "if (mpn_add (...))"
+ doesn't seem able to move the true and false legs of the conditional up
+ to the two places cout is generated. */
- On chips with a good clc, that could be used and the loops driven with a
- single jnbe instead of two jumps, to perhaps save a BTB entry. The final
- cout=-cout would become cout++. */
-
-#if defined (__GNUC__) && ! (__GNUC__ == 2 && __GNUC_MINOR__ == 91) \
- && (defined (__i386__) || defined (__i486__)) \
- && __GMP_BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM)
-
-#define __GMPN_AORS_1_INPLACE(cout, ptr, size, n, aors) \
- do { \
- mp_ptr __dummy1; \
- mp_size_t __dummy2; \
- \
- if (__builtin_constant_p (n) && (n) == 1) \
- { \
- __asm__ __volatile__ \
- (__GMP_ASM_L(top) ":\n" \
- aors " $1, (%1)\n" \
- " jnc " __GMP_ASM_L(done) "\n" \
- " leal 4(%1), %1\n" \
- " decl %2\n" \
- " jnz " __GMP_ASM_L(top) "\n" \
- __GMP_ASM_L(done) ":\n" \
- " sbbl %0, %0\n" \
- : "=r" (cout), \
- "=&r" (__dummy1), \
- "=&rm" (__dummy2) \
- : "1" (ptr), \
- "2" (size) \
- : "memory"); \
- } \
- else \
- { \
- __asm__ __volatile__ \
- ( aors " %5, (%1)\n" \
- " jnc " __GMP_ASM_L(done) "\n" \
- __GMP_ASM_L(top) ":\n" \
- " leal 4(%1), %1\n" \
- " decl %2\n" \
- " jz " __GMP_ASM_L(done) "\n" \
- aors " $1, (%1)\n" \
- " jc " __GMP_ASM_L(top) "\n" \
- __GMP_ASM_L(done) ":\n" \
- " sbbl %0, %0\n" \
- : "=r" (cout), \
- "=&r" (__dummy1), \
- "=&rm" (__dummy2) \
- : "1" (ptr), \
- "2" (size), \
- "ri" (n) \
- : "memory"); \
- } \
- (cout) = -(cout); \
- \
- } while (0)
-
-#define __GMPN_AORS_1_GENERAL_ONE(cout, dst, src, size, aors) \
- do { \
- mp_ptr __dst; \
- mp_srcptr __src; \
- mp_size_t __size; \
- \
- __asm__ __volatile__ \
- (__GMP_ASM_L(top) ":\n" \
- " movl (%2), %0\n" \
- " addl $4, %2\n" \
- aors " $1, %0\n" \
- " movl %0, (%1)\n" \
- " leal 4(%1), %1\n" \
- " decl %3\n" \
- " jz " __GMP_ASM_L(done) "\n" \
- " jc " __GMP_ASM_L(top) "\n" \
- __GMP_ASM_L(done) ":\n" \
- " sbbl %0, %0\n" \
- : "=&r" (cout), \
- "=&r" (__dst), \
- "=&r" (__src), \
- "=&rm" (__size) \
- : "1" (dst), \
- "2" (src), \
- "3" (size) \
- : "memory"); \
- \
- (cout) = -(cout); \
- if (__src != __dst) \
- __GMPN_COPY (__dst, __src, __size); \
- \
- } while (0)
-
-#define __GMPN_ADD_1(cout, dst, src, size, n) \
+#define __GMPN_AORS(cout, wp, xp, xsize, yp, ysize, FUNCTION, TEST) \
do { \
- /* ASSERT ((size) >= 1); */ \
- /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \
- \
- if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \
- { \
- __GMPN_AORS_1_INPLACE (cout, dst, size, n, "addl"); \
- } \
- else \
- { \
- if (__builtin_constant_p (n) && (n) == 1) \
- { \
- __GMPN_AORS_1_GENERAL_ONE (cout, dst, src, size, "addl"); \
- } \
- else \
- { \
- mp_ptr __dst; \
- mp_srcptr __src; \
- mp_size_t __size; \
- \
- __asm__ __volatile__ \
- (__GMP_ASM_L(top) ":\n" \
- " addl (%2), %0\n" \
- " leal 4(%2), %2\n" \
- " movl %0, (%1)\n" \
- " leal 4(%1), %1\n" \
- " movl $1, %0\n" \
- " decl %3\n" \
- " jz " __GMP_ASM_L(done) "\n" \
- " jc " __GMP_ASM_L(top) "\n" \
- __GMP_ASM_L(done) ":\n" \
- " sbbl %0, %0\n" \
- : "=&r" (cout), \
- "=&r" (__dst), \
- "=&r" (__src), \
- "=&rm" (__size) \
- : "1" (dst), \
- "2" (src), \
- "3" (size), \
- "0" (n) \
- : "memory"); \
+ mp_size_t __i; \
+ mp_limb_t __x; \
\
- (cout) = -(cout); \
- if (__src != __dst) \
- __GMPN_COPY (__dst, __src, __size); \
- } \
- } \
- } while (0)
-
-#define __GMPN_SUB_1(cout, dst, src, size, n) \
- do { \
- /* ASSERT ((size) >= 1); */ \
- /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \
+ /* ASSERT ((ysize) >= 0); */ \
+ /* ASSERT ((xsize) >= (ysize)); */ \
+ /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, xp, xsize)); */ \
+ /* ASSERT (MPN_SAME_OR_SEPARATE2_P (wp, xsize, yp, ysize)); */ \
\
- if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \
+ __i = (ysize); \
+ if (__i != 0) \
{ \
- __GMPN_AORS_1_INPLACE (cout, dst, size, n, "subl"); \
- } \
- else \
- { \
- if (__builtin_constant_p (n) && (n) == 1) \
- { \
- __GMPN_AORS_1_GENERAL_ONE (cout, dst, src, size, "subl"); \
- } \
- else \
+ if (FUNCTION (wp, xp, yp, __i)) \
{ \
- mp_ptr __dst; \
- mp_srcptr __src; \
- mp_size_t __size; \
- mp_limb_t __dummy; \
- \
- __asm__ __volatile__ \
- (__GMP_ASM_L(top) ":\n" \
- " movl (%2), %0\n" \
- " addl $4, %2\n" \
- " subl %4, %0\n" \
- " movl %0, (%1)\n" \
- " leal 4(%1), %1\n" \
- " movl $1, %4\n" \
- " decl %3\n" \
- " jz " __GMP_ASM_L(done) "\n" \
- " jc " __GMP_ASM_L(top) "\n" \
- __GMP_ASM_L(done) ":\n" \
- " sbbl %0, %0\n" \
- : "=&r" (cout), \
- "=&r" (__dst), \
- "=&r" (__src), \
- "=&rm" (__size), \
- "=&rm" (__dummy) \
- : "1" (dst), \
- "2" (src), \
- "3" (size), \
- "4" (n) \
- : "memory"); \
- \
- (cout) = -(cout); \
- if (__src != __dst) \
- __GMPN_COPY (__dst, __src, __size); \
+ do \
+ { \
+ if (__i >= (xsize)) \
+ { \
+ (cout) = 1; \
+ goto __done; \
+ } \
+ __x = (xp)[__i]; \
+ } \
+ while (TEST); \
} \
} \
+ if ((wp) != (xp)) \
+ __GMPN_COPY_REST (wp, xp, xsize, __i); \
+ (cout) = 0; \
+ __done: \
+ ; \
} while (0)
-#endif
+#define __GMPN_ADD(cout, wp, xp, xsize, yp, ysize) \
+ __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_add_n, \
+ (((wp)[__i++] = __x + 1) == 0))
+#define __GMPN_SUB(cout, wp, xp, xsize, yp, ysize) \
+ __GMPN_AORS (cout, wp, xp, xsize, yp, ysize, mpn_sub_n, \
+ (((wp)[__i++] = __x - 1), __x == 0))
-/* The special cases here for src==dst known at compile time are because gcc
- (as of 2.95.4) doesn't recognise __src and __dst are identical when
- initialized from the same expression, perhaps because they get
- incremented (despite being incremented together). The main saving from
- this is not needing the __GMPN_COPY code.
- The use of __n is designed to keep the code size down. On the second and
- subsequent limbs updated it's only a carry of 1 being propagated and that
- could be separate code, but it seems better to share one piece of
- load/add/store/test since on random data only 1 or 2 limbs normally need
- to be touched.
+/* The use of __i indexing is designed to ensure a compile time src==dst
+ remains nice and clear to the compiler, so that __GMPN_COPY_REST can
+ disappear, and the load/add/store gets a chance to become a
+ read-modify-write on CISC CPUs.
- For constant n==1, __n optimizes down to a constant 1, which saves a few
- bytes of code.
+ The use of __n is designed to keep code size down by sharing the
+ load/add/store for the first limb and subsequent carry propagation.
+ There's also a good chance a compile time n==1 input will be recognised
+ as making __n a constant 1, certainly gcc recognises this.
- In __GMPN_SUB_1, constant n==1 might be better off with the "no-borrow"
- test as "(dst = src - 1) != MP_LIMB_T_MAX" rather than the current "(dst
- = src - 1) <= src", but the difference between the two ought to be
- minimal, and in any case ideally gcc would recognise both as checking
- unsigned underflow. */
+ Alternatives:
-#if defined (__GNUC__) && ! defined (__GMPN_ADD_1)
-#define __GMPN_AORS_1(cout, dst, src, size, n, TEST) \
- do { \
- mp_ptr __dst = (dst); \
- mp_size_t __size = (size); \
- mp_limb_t __n = (n); \
- mp_limb_t __x; \
- \
- /* ASSERT ((size) >= 1); */ \
- /* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \
- \
- if (__builtin_constant_p ((dst) == (src)) && (dst) == (src)) \
- { \
- (cout) = 0; \
- for (;;) \
- { \
- __x = *__dst; \
- if (TEST) \
- break; \
- __n = 1; \
- if (--__size == 0) \
- { \
- (cout) = 1; \
- break; \
- } \
- } \
- } \
- else \
- { \
- mp_srcptr __src = (src); \
- (cout) = 0; \
- for (;;) \
- { \
- __size--; \
- __x = *__src++; \
- if (TEST) \
- { \
- if (__dst != __src) \
- __GMPN_COPY (__dst, __src, __size); \
- break; \
- } \
- __n = 1; \
- if (__size == 0) \
- { \
- (cout) = 1; \
- break; \
- } \
- } \
- } \
- } while (0)
+ Using a pair of pointers instead of indexing would be possible, but gcc
+ isn't able to recognise compile-time src==dst in that case, even when the
+ pointers are incremented more or less together. Other compilers would
+ very likely have similar difficulty.
-#define __GMPN_ADD_1(cout, dst, src, size, n) \
- __GMPN_AORS_1 (cout, dst, src, size, n, (*__dst++ = __x + __n) >= __n)
-#define __GMPN_SUB_1(cout, dst, src, size, n) \
- __GMPN_AORS_1 (cout, dst, src, size, n, (*__dst++ = __x - __n) <= __x)
-#endif
+ gcc could use "if (__builtin_constant_p(src==dst) && src==dst)" or
+ similar to detect a compile-time src==dst. This works nicely on gcc
+ 2.95.x, it's not good on gcc 3.0 where __builtin_constant_p(p==p) seems
+ to be always false, for a pointer p. But the current code form seems
+ good enough for src==dst anyway.
+ gcc on x86 as usual doesn't give particularly good flags handling for the
+ carry/borrow detection. It's tempting to want some multi instruction asm
+ blocks to help it, and this was tried, but in truth there's only a few
+ instructions to save and any gain is all too easily lost by register
+ juggling setting up for the asm. */
-/* The following is designed to optimize down on non-gcc compilers. The use
- of __i ensures a compile time src==dst remains nice and clear, in
- particular the __GMPN_COPY will disappear, and the load/add/store gets a
- chance to become a read/modify/write on CISC CPUs. The use of __n is as
- per the gcc code above and should be recognised as a constant 1 for a
- constant n==1. */
-
-#ifndef __GMPN_ADD_1
#define __GMPN_AORS_1(cout, dst, src, size, n, TEST) \
do { \
mp_size_t __i; \
- mp_limb_t __n = (n); \
- mp_limb_t __x; \
+ mp_limb_t __n, __x; \
\
/* ASSERT ((size) >= 1); */ \
/* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \
\
(cout) = 0; \
__i = 0; \
+ __n = (n); \
for (;;) \
{ \
__x = (src)[__i]; \
if (TEST) \
{ \
if ((src) != (dst)) \
- { \
- __i++; \
- __GMPN_COPY ((dst)+__i, (src)+__i, (size)-__i); \
- } \
+ __GMPN_COPY_REST (dst, src, size, __i+1); \
break; \
} \
__n = 1; \
@@ -1726,7 +1486,6 @@ mpf_size (mpf_srcptr f)
#define __GMPN_SUB_1(cout, dst, src, size, n) \
__GMPN_AORS_1(cout, dst, src, size, n, \
((dst)[__i] = __x - __n) <= __x)
-#endif
/* Compare {xp,size} and {yp,size}, setting "result" to positive, zero or
@@ -1793,21 +1552,53 @@ mpf_size (mpf_srcptr f)
} while (0)
#endif
-/* Enhancement: Use some of the smarter code from gmp-impl.h. Maybe use
- mpn_copyi if there's a native version, and if we don't mind demanding
- binary compatibility for it (on targets which use it). */
+#if defined (__GMPN_COPY) && ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start) \
+ do { \
+ /* ASSERT ((start) >= 0); */ \
+ /* ASSERT ((start) <= (size)); */ \
+ __GMPN_COPY ((dst)+(start), (src)+(start), (size)-(start)); \
+ } while (0)
+#endif
-#ifndef __GMPN_COPY
-#define __GMPN_COPY(dst, src, size) \
+/* Copy {src,size} to {dst,size}, starting at "start". This is designed to
+ keep the indexing dst[j] and src[j] nice and simple for __GMPN_ADD_1,
+ __GMPN_ADD, etc. */
+#if ! defined (__GMPN_COPY_REST)
+#define __GMPN_COPY_REST(dst, src, size, start) \
do { \
mp_size_t __j; \
/* ASSERT ((size) >= 0); */ \
+ /* ASSERT ((start) >= 0); */ \
+ /* ASSERT ((start) <= (size)); */ \
/* ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); */ \
- for (__j = 0; __j < (size); __j++) \
+ for (__j = (start); __j < (size); __j++) \
(dst)[__j] = (src)[__j]; \
} while (0)
#endif
+/* Enhancement: Use some of the smarter code from gmp-impl.h. Maybe use
+ mpn_copyi if there's a native version, and if we don't mind demanding
+ binary compatibility for it (on targets which use it). */
+
+#if ! defined (__GMPN_COPY)
+#define __GMPN_COPY(dst, src, size) __GMPN_COPY_REST (dst, src, size, 0)
+#endif
+
+
+#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add
+#if ! __GMP_FORCE_mpn_add
+__GMP_EXTERN_INLINE
+#endif
+mp_limb_t
+mpn_add (mp_ptr wp,
+ mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
+{
+ mp_limb_t c;
+ __GMPN_ADD (c, wp, xp, xsize, yp, ysize);
+ return c;
+}
+#endif
#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add_1
#if ! __GMP_FORCE_mpn_add_1
@@ -1835,71 +1626,33 @@ mpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
}
#endif
-#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub_1
-#if ! __GMP_FORCE_mpn_sub_1
+#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub
+#if ! __GMP_FORCE_mpn_sub
__GMP_EXTERN_INLINE
#endif
mp_limb_t
-mpn_sub_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t n)
+mpn_sub (mp_ptr wp,
+ mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
{
mp_limb_t c;
- __GMPN_SUB_1 (c, dst, src, size, n);
+ __GMPN_SUB (c, wp, xp, xsize, yp, ysize);
return c;
}
#endif
-
-#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_add
-#if ! __GMP_FORCE_mpn_add
+#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub_1
+#if ! __GMP_FORCE_mpn_sub_1
__GMP_EXTERN_INLINE
#endif
mp_limb_t
-mpn_add (register mp_ptr res_ptr,
- register mp_srcptr s1_ptr,
- register mp_size_t s1_size,
- register mp_srcptr s2_ptr,
- register mp_size_t s2_size)
+mpn_sub_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t n)
{
- mp_limb_t cy_limb = 0;
-
- if (s2_size != 0)
- cy_limb = mpn_add_n (res_ptr, s1_ptr, s2_ptr, s2_size);
-
- if (s1_size - s2_size != 0)
- __GMPN_ADD_1 (cy_limb,
- res_ptr + s2_size,
- s1_ptr + s2_size,
- s1_size - s2_size,
- cy_limb);
- return cy_limb;
+ mp_limb_t c;
+ __GMPN_SUB_1 (c, dst, src, size, n);
+ return c;
}
#endif
-#if defined (__GMP_EXTERN_INLINE) || __GMP_FORCE_mpn_sub
-#if ! __GMP_FORCE_mpn_sub
-__GMP_EXTERN_INLINE
-#endif
-mp_limb_t
-mpn_sub (register mp_ptr res_ptr,
- register mp_srcptr s1_ptr,
- register mp_size_t s1_size,
- register mp_srcptr s2_ptr,
- register mp_size_t s2_size)
-{
- mp_limb_t cy_limb = 0;
-
- if (s2_size != 0)
- cy_limb = mpn_sub_n (res_ptr, s1_ptr, s2_ptr, s2_size);
-
- if (s1_size - s2_size != 0)
- __GMPN_SUB_1 (cy_limb,
- res_ptr + s2_size,
- s1_ptr + s2_size,
- s1_size - s2_size,
- cy_limb);
- return cy_limb;
-}
-#endif
#if defined (__cplusplus)