summaryrefslogtreecommitdiff
path: root/mul.c
diff options
context:
space:
mode:
authorpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-08 14:08:45 +0000
committerpelissip <pelissip@280ebfd0-de03-0410-8827-d642c229c3f4>2004-12-08 14:08:45 +0000
commita824f7ad7bc1fe409778f39947af2914f36f0cb6 (patch)
tree3bf05a53776f526ca2f80a6380956ed49db09325 /mul.c
parentba5d77b6a9e5019e745dfaf8d5d4b6265449ac69 (diff)
downloadmpfr-a824f7ad7bc1fe409778f39947af2914f36f0cb6.tar.gz
Optimize mpfr_mul by inlining and rewriting the rounding.
It seems that GCC option `-frename-registers` for mpfr_mul / Athlon XP improves its performance (But it decreases it on Pentium4)... git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@3120 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'mul.c')
-rw-r--r--mul.c134
1 files changed, 61 insertions, 73 deletions
diff --git a/mul.c b/mul.c
index a52493e26..39609012f 100644
--- a/mul.c
+++ b/mul.c
@@ -24,101 +24,97 @@ MA 02111-1307, USA. */
int
mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode)
{
- int sign_product, cc, inexact;
- mp_exp_t ax;
+ int sign, inexact;
+ mp_exp_t ax, ax2;
mp_limb_t *tmp;
mp_limb_t b1;
mp_prec_t bq, cq;
mp_size_t bn, cn, tn, k;
- TMP_DECL(marker);
+ TMP_DECL (marker);
/* deal with special cases */
- if (MPFR_ARE_SINGULAR(b,c))
+ if (MPFR_ARE_SINGULAR (b, c))
{
- if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c))
+ if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c))
{
- MPFR_SET_NAN(a);
+ MPFR_SET_NAN (a);
MPFR_RET_NAN;
}
- sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
- if (MPFR_IS_INF(b))
+ sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
+ if (MPFR_IS_INF (b))
{
- if (MPFR_IS_INF(c) || MPFR_NOTZERO(c))
+ if (!MPFR_IS_ZERO (c))
{
- MPFR_SET_SIGN(a,sign_product);
- MPFR_SET_INF(a);
- MPFR_RET(0); /* exact */
+ MPFR_SET_SIGN (a, sign);
+ MPFR_SET_INF (a);
+ MPFR_RET (0);
}
else
{
- MPFR_SET_NAN(a);
+ MPFR_SET_NAN (a);
MPFR_RET_NAN;
}
}
- else if (MPFR_IS_INF(c))
+ else if (MPFR_IS_INF (c))
{
- if (MPFR_NOTZERO(b))
+ if (!MPFR_IS_ZERO (b))
{
- MPFR_SET_SIGN(a, sign_product);
- MPFR_SET_INF(a);
- MPFR_RET(0); /* exact */
+ MPFR_SET_SIGN (a, sign);
+ MPFR_SET_INF (a);
+ MPFR_RET(0);
}
else
{
- MPFR_SET_NAN(a);
+ MPFR_SET_NAN (a);
MPFR_RET_NAN;
}
}
else
{
- MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
- MPFR_SET_SIGN(a, sign_product);
- MPFR_SET_ZERO(a);
- MPFR_RET(0); /* 0 * 0 is exact */
+ MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c));
+ MPFR_SET_SIGN (a, sign);
+ MPFR_SET_ZERO (a);
+ MPFR_RET (0);
}
}
- MPFR_CLEAR_FLAGS(a);
- sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) );
+ MPFR_CLEAR_FLAGS (a);
+ sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c));
ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c);
/* Note: the exponent of the exact result will be e = bx + cx + ec with
ec in {-1,0,1} and the following assumes that e is representable. */
- /* These ASSERT should be always true */
- MPFR_ASSERTN(MPFR_EMAX_MAX <= (MPFR_EXP_MAX >> 1));
- MPFR_ASSERTN(MPFR_EMIN_MIN >= -(MPFR_EXP_MAX >> 1));
/* FIXME: Usefull since we do an exponent check after ?
* It is usefull iff the precision is big, there is an overflow
* and we are doing further mults...*/
#ifdef HUGE
- if (MPFR_UNLIKELY(ax > __gmpfr_emax + 1))
- return mpfr_set_overflow (a, rnd_mode, sign_product);
- if (MPFR_UNLIKELY(ax < __gmpfr_emin - 2))
+ if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1))
+ return mpfr_set_overflow (a, rnd_mode, sign);
+ if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2))
return mpfr_set_underflow (a, rnd_mode == GMP_RNDN ? GMP_RNDZ : rnd_mode,
- sign_product);
+ sign);
#endif
- bq = MPFR_PREC(b);
- cq = MPFR_PREC(c);
+ bq = MPFR_PREC (b);
+ cq = MPFR_PREC (c);
- MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
+ MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */
bn = (bq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of b */
cn = (cq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of c */
k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */
tn = (bq + cq + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB;
- /* <= k, thus no int overflow */
- MPFR_ASSERTD(tn <= k);
+ MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */
/* Check for no size_t overflow*/
- MPFR_ASSERTD((size_t) k <= ((size_t) ~0) / BYTES_PER_MP_LIMB);
- TMP_MARK(marker);
- tmp = (mp_limb_t *) TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB);
+ MPFR_ASSERTD ((size_t) k <= ((size_t) ~0) / BYTES_PER_MP_LIMB);
+ TMP_MARK (marker);
+ tmp = (mp_limb_t *) TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB);
/* multiplies two mantissa in temporary allocated space */
- b1 = (MPFR_LIKELY(bn >= cn)) ?
- mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn)
- : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn);
+ b1 = MPFR_LIKELY (bn >= cn)
+ ? mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn)
+ : mpn_mul (tmp, MPFR_MANT (c), cn, MPFR_MANT (b), bn);
/* now tmp[0]..tmp[k-1] contains the product of both mantissa,
with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */
@@ -128,36 +124,28 @@ mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode)
then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386
and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */
tmp += k - tn;
- if (MPFR_UNLIKELY(b1 == 0))
+ if (MPFR_UNLIKELY (b1 == 0))
mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */
- cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq,
- MPFR_IS_NEG_SIGN(sign_product),
- MPFR_PREC (a), rnd_mode, &inexact);
-
- /* cc = 1 ==> result is a power of two */
- if (MPFR_UNLIKELY(cc))
- MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT;
-
- TMP_FREE(marker);
-
- {
- mp_exp_t ax2 = ax + (mp_exp_t) (b1 - 1 + cc);
- if (MPFR_UNLIKELY( ax2 > __gmpfr_emax))
- return mpfr_set_overflow (a, rnd_mode, sign_product);
- if (MPFR_UNLIKELY( ax2 < __gmpfr_emin))
- {
- /* In the rounding to the nearest mode, if the exponent of the exact
- result (i.e. before rounding, i.e. without taking cc into account)
- is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
- both arguments are powers of 2), then round to zero. */
- if (rnd_mode == GMP_RNDN &&
- (ax + (mp_exp_t) b1 < __gmpfr_emin ||
- (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
- rnd_mode = GMP_RNDZ;
- return mpfr_set_underflow (a, rnd_mode, sign_product);
- }
- MPFR_SET_EXP (a, ax2);
- MPFR_SET_SIGN(a, sign_product);
- }
+
+ ax2 = ax + (mp_exp_t) (b1 - 1);
+ MPFR_RNDRAW (inexact, a, tmp, bq+cq, rnd_mode, sign, ax2++);
+ TMP_FREE (marker);
+ MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Exponent may be out of range */
+ MPFR_SET_SIGN (a, sign);
+ if (MPFR_UNLIKELY (ax2 > __gmpfr_emax))
+ return mpfr_set_overflow (a, rnd_mode, sign);
+ if (MPFR_UNLIKELY (ax2 < __gmpfr_emin))
+ {
+ /* In the rounding to the nearest mode, if the exponent of the exact
+ result (i.e. before rounding, i.e. without taking cc into account)
+ is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if
+ both arguments are powers of 2), then round to zero. */
+ if (rnd_mode == GMP_RNDN
+ && (ax + (mp_exp_t) b1 < __gmpfr_emin
+ || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c))))
+ rnd_mode = GMP_RNDZ;
+ return mpfr_set_underflow (a, rnd_mode, sign);
+ }
return inexact;
}
+