diff options
author | zimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2014-01-23 21:27:33 +0000 |
---|---|---|
committer | zimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2014-01-23 21:27:33 +0000 |
commit | 90d49d0e9f56b127a8f83d947584e6dee8804619 (patch) | |
tree | ec1bfa47dcceed21484e557886de61b9b4e0f266 /src | |
parent | f4b465b850a582dfed113cef3242543eb19095bf (diff) | |
download | mpc-90d49d0e9f56b127a8f83d947584e6dee8804619.tar.gz |
improved version of mpfr_fmma, before we had for MPCbench:
score for mul : 178271
score for add : 2871184
score for sub : 2699846
score for div : 70527
group score Arith : 558739
score for sqrt : 93427
score for exp : 9422
score for log : 4734
score for cos : 8291
score for sin : 8308
score for acos : 2846
score for asin : 2724
group score Special : 8068
global score : 37672
Now:
score for mul : 196286
score for add : 2865480
score for sub : 2665855
score for div : 72851
group score Arith : 574896
score for sqrt : 93204
score for exp : 9432
score for log : 4720
score for cos : 8304
score for sin : 8327
score for acos : 2849
score for asin : 2731
group score Special : 8071
global score : 38075
git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1439 211d60ee-9f03-0410-a15a-8952a2c7a4e4
Diffstat (limited to 'src')
-rw-r--r-- | src/mul.c | 90 |
1 files changed, 64 insertions, 26 deletions
@@ -170,6 +170,10 @@ mul_imag (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd) return MPC_INEX (inex_re, inex_im); } +#define MPFR_MANT(x) ((x)->_mpfr_d) +#define MPFR_PREC(x) ((x)->_mpfr_prec) +#define MPFR_EXP(x) ((x)->_mpfr_exp) +#define MPFR_LIMB_SIZE(x) ((MPFR_PREC (x) - 1) / GMP_NUMB_BITS + 1) static int mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, @@ -183,33 +187,68 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, int inex; mpfr_t u, v; + mp_size_t an, bn, cn, dn; /* u=a*b, v=sign*c*d exactly */ - mpfr_init2 (u, mpfr_get_prec (a) + mpfr_get_prec (b)); - mpfr_init2 (v, mpfr_get_prec (c) + mpfr_get_prec (d)); - mpfr_mul (u, a, b, MPFR_RNDN); - mpfr_mul (v, c, d, MPFR_RNDN); - if (sign < 0) - mpfr_neg (v, v, MPFR_RNDN); + an = MPFR_LIMB_SIZE(a); + bn = MPFR_LIMB_SIZE(b); + cn = MPFR_LIMB_SIZE(c); + dn = MPFR_LIMB_SIZE(d); + MPFR_MANT(u) = malloc ((an + bn) * sizeof (mp_limb_t)); + MPFR_MANT(v) = malloc ((cn + dn) * sizeof (mp_limb_t)); + if (an >= bn) + mpn_mul (MPFR_MANT(u), MPFR_MANT(a), an, MPFR_MANT(b), bn); + else + mpn_mul (MPFR_MANT(u), MPFR_MANT(b), bn, MPFR_MANT(a), an); + if ((MPFR_MANT(u)[an + bn - 1] >> (GMP_NUMB_BITS - 1)) == 0) + { + mpn_lshift (MPFR_MANT(u), MPFR_MANT(u), an + bn, 1); + MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b) - 1; + } + else + MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b); + if (cn >= dn) + mpn_mul (MPFR_MANT(v), MPFR_MANT(c), cn, MPFR_MANT(d), dn); + else + mpn_mul (MPFR_MANT(v), MPFR_MANT(d), dn, MPFR_MANT(c), cn); + if ((MPFR_MANT(v)[cn + dn - 1] >> (GMP_NUMB_BITS - 1)) == 0) + { + mpn_lshift (MPFR_MANT(v), MPFR_MANT(v), cn + dn, 1); + MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d) - 1; + } + else + MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d); + MPFR_PREC(u) = (an + bn) * GMP_NUMB_BITS; + MPFR_PREC(v) = (cn + dn) * GMP_NUMB_BITS; + MPFR_SIGN(u) = MPFR_SIGN(a) * MPFR_SIGN(b); + if (sign > 0) + MPFR_SIGN(v) = MPFR_SIGN(c) * MPFR_SIGN(d); + else + MPFR_SIGN(v) = -MPFR_SIGN(c) * MPFR_SIGN(d); + + mpfr_check_range (u, 0, MPFR_RNDN); + mpfr_check_range (v, 0, MPFR_RNDN); /* tentatively compute z as u+v; here we need z to be distinct from a, b, c, d to not lose the latter */ inex = mpfr_add (z, u, v, rnd); - - if (mpfr_inf_p (z)) { - /* replace by "correctly rounded overflow" */ - mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN); - inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); - } - else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { - /* exactly u underflowed, determine inexact flag */ - inex = (mpfr_signbit (u) ? 1 : -1); - } - else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { - /* exactly v underflowed, determine inexact flag */ - inex = (mpfr_signbit (v) ? 1 : -1); - } - else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { + + if (!mpfr_regular_p(z) || !mpfr_regular_p(u) || !mpfr_regular_p(v)) + { + if (mpfr_inf_p (z)) { + /* replace by "correctly rounded overflow" */ + mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN); + inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd); + } + else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) { + /* exactly u underflowed, determine inexact flag */ + inex = (mpfr_signbit (u) ? 1 : -1); + } + else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) { + /* exactly v underflowed, determine inexact flag */ + inex = (mpfr_signbit (v) ? 1 : -1); + } + else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) { /* In the first case, u and v are infinities with opposite signs. In the second case, u and v are zeroes; their sum may be 0 or the least representable number, with a sign to be determined. @@ -312,15 +351,14 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_set_exp ((mpfr_ptr) c, ec); mpfr_set_exp ((mpfr_ptr) d, ed); /* works also when some of a, b, c, d are not all distinct */ - } - - mpfr_clear (u); - mpfr_clear (v); + } + } + free (MPFR_MANT(u)); + free (MPFR_MANT(v)); return inex; } - int mpc_mul_naive (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd) { |