From 90d49d0e9f56b127a8f83d947584e6dee8804619 Mon Sep 17 00:00:00 2001 From: zimmerma Date: Thu, 23 Jan 2014 21:27:33 +0000 Subject: improved version of mpfr_fmma, before we had for MPCbench: score for mul : 178271 score for add : 2871184 score for sub : 2699846 score for div : 70527 group score Arith : 558739 score for sqrt : 93427 score for exp : 9422 score for log : 4734 score for cos : 8291 score for sin : 8308 score for acos : 2846 score for asin : 2724 group score Special : 8068 global score : 37672 Now: score for mul : 196286 score for add : 2865480 score for sub : 2665855 score for div : 72851 group score Arith : 574896 score for sqrt : 93204 score for exp : 9432 score for log : 4720 score for cos : 8304 score for sin : 8327 score for acos : 2849 score for asin : 2731 group score Special : 8071 global score : 38075 git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1439 211d60ee-9f03-0410-a15a-8952a2c7a4e4 --- src/mul.c | 90 +++++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 26 deletions(-) diff --git a/src/mul.c b/src/mul.c index 73c581d..3c9c0a7 100644 --- a/src/mul.c +++ b/src/mul.c @@ -170,6 +170,10 @@ mul_imag (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd) return MPC_INEX (inex_re, inex_im); } +#define MPFR_MANT(x) ((x)->_mpfr_d) +#define MPFR_PREC(x) ((x)->_mpfr_prec) +#define MPFR_EXP(x) ((x)->_mpfr_exp) +#define MPFR_LIMB_SIZE(x) ((MPFR_PREC (x) - 1) / GMP_NUMB_BITS + 1) static int mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, @@ -183,33 +187,68 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, int inex; mpfr_t u, v; + mp_size_t an, bn, cn, dn; /* u=a*b, v=sign*c*d exactly */ - mpfr_init2 (u, mpfr_get_prec (a) + mpfr_get_prec (b)); - mpfr_init2 (v, mpfr_get_prec (c) + mpfr_get_prec (d)); - mpfr_mul (u, a, b, MPFR_RNDN); - mpfr_mul (v, c, d, MPFR_RNDN); - if (sign < 0) - mpfr_neg (v, v, MPFR_RNDN); + an = MPFR_LIMB_SIZE(a); + bn = MPFR_LIMB_SIZE(b); + cn = MPFR_LIMB_SIZE(c); + dn = MPFR_LIMB_SIZE(d); + MPFR_MANT(u) = malloc ((an + bn) * sizeof (mp_limb_t)); + MPFR_MANT(v) = malloc ((cn + dn) * sizeof (mp_limb_t)); + if (an >= bn) + mpn_mul (MPFR_MANT(u), MPFR_MANT(a), an, MPFR_MANT(b), bn); + else + mpn_mul (MPFR_MANT(u), MPFR_MANT(b), bn, MPFR_MANT(a), an); + if ((MPFR_MANT(u)[an + bn - 1] >> (GMP_NUMB_BITS - 1)) == 0) + { + mpn_lshift (MPFR_MANT(u), MPFR_MANT(u), an + bn, 1); + MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b) - 1; + } + else + MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b); + if (cn >= dn) + mpn_mul (MPFR_MANT(v), MPFR_MANT(c), cn, MPFR_MANT(d), dn); + else + mpn_mul (MPFR_MANT(v), MPFR_MANT(d), dn, MPFR_MANT(c), cn); + if ((MPFR_MANT(v)[cn + dn - 1] >> (GMP_NUMB_BITS - 1)) == 0) + { + mpn_lshift (MPFR_MANT(v), MPFR_MANT(v), cn + dn, 1); + MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d) - 1; + } + else + MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d); + MPFR_PREC(u) = (an + bn) * GMP_NUMB_BITS; + MPFR_PREC(v) = (cn + dn) * GMP_NUMB_BITS; + MPFR_SIGN(u) = MPFR_SIGN(a) * MPFR_SIGN(b); + if (sign > 0) + MPFR_SIGN(v) = MPFR_SIGN(c) * MPFR_SIGN(d); + else + MPFR_SIGN(v) = -MPFR_SIGN(c) * MPFR_SIGN(d); + + mpfr_check_range (u, 0, MPFR_RNDN); + mpfr_check_range (v, 0, MPFR_RNDN); /* tentatively compute z as u+v; here we need z to be distinct from a, b, c, d to not lose the latter */ inex = mpfr_add (z, u, v, rnd); - - if (mpfr_inf_p (z)) { - /* replace by "correctly rounded overflow" */ - mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN); - inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); - } - else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { - /* exactly u underflowed, determine inexact flag */ - inex = (mpfr_signbit (u) ? 1 : -1); - } - else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { - /* exactly v underflowed, determine inexact flag */ - inex = (mpfr_signbit (v) ? 1 : -1); - } - else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { + + if (!mpfr_regular_p(z) || !mpfr_regular_p(u) || !mpfr_regular_p(v)) + { + if (mpfr_inf_p (z)) { + /* replace by "correctly rounded overflow" */ + mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN); + inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); + } + else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { + /* exactly u underflowed, determine inexact flag */ + inex = (mpfr_signbit (u) ? 1 : -1); + } + else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { + /* exactly v underflowed, determine inexact flag */ + inex = (mpfr_signbit (v) ? 1 : -1); + } + else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { /* In the first case, u and v are infinities with opposite signs. In the second case, u and v are zeroes; their sum may be 0 or the least representable number, with a sign to be determined. @@ -312,15 +351,14 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_set_exp ((mpfr_ptr) c, ec); mpfr_set_exp ((mpfr_ptr) d, ed); /* works also when some of a, b, c, d are not all distinct */ - } - - mpfr_clear (u); - mpfr_clear (v); + } + } + free (MPFR_MANT(u)); + free (MPFR_MANT(v)); return inex; } - int mpc_mul_naive (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd) { -- cgit v1.2.1