summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2014-01-23 21:27:33 +0000
committerzimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4>2014-01-23 21:27:33 +0000
commit90d49d0e9f56b127a8f83d947584e6dee8804619 (patch)
treeec1bfa47dcceed21484e557886de61b9b4e0f266
parentf4b465b850a582dfed113cef3242543eb19095bf (diff)
downloadmpc-90d49d0e9f56b127a8f83d947584e6dee8804619.tar.gz
improved version of mpfr_fmma, before we had for MPCbench:
score for mul : 178271 score for add : 2871184 score for sub : 2699846 score for div : 70527 group score Arith : 558739 score for sqrt : 93427 score for exp : 9422 score for log : 4734 score for cos : 8291 score for sin : 8308 score for acos : 2846 score for asin : 2724 group score Special : 8068 global score : 37672 Now: score for mul : 196286 score for add : 2865480 score for sub : 2665855 score for div : 72851 group score Arith : 574896 score for sqrt : 93204 score for exp : 9432 score for log : 4720 score for cos : 8304 score for sin : 8327 score for acos : 2849 score for asin : 2731 group score Special : 8071 global score : 38075 git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1439 211d60ee-9f03-0410-a15a-8952a2c7a4e4
-rw-r--r--src/mul.c90
1 files changed, 64 insertions, 26 deletions
diff --git a/src/mul.c b/src/mul.c
index 73c581d..3c9c0a7 100644
--- a/src/mul.c
+++ b/src/mul.c
@@ -170,6 +170,10 @@ mul_imag (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd)
return MPC_INEX (inex_re, inex_im);
}
+#define MPFR_MANT(x) ((x)->_mpfr_d)
+#define MPFR_PREC(x) ((x)->_mpfr_prec)
+#define MPFR_EXP(x) ((x)->_mpfr_exp)
+#define MPFR_LIMB_SIZE(x) ((MPFR_PREC (x) - 1) / GMP_NUMB_BITS + 1)
static int
mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
@@ -183,33 +187,68 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
int inex;
mpfr_t u, v;
+ mp_size_t an, bn, cn, dn;
/* u=a*b, v=sign*c*d exactly */
- mpfr_init2 (u, mpfr_get_prec (a) + mpfr_get_prec (b));
- mpfr_init2 (v, mpfr_get_prec (c) + mpfr_get_prec (d));
- mpfr_mul (u, a, b, MPFR_RNDN);
- mpfr_mul (v, c, d, MPFR_RNDN);
- if (sign < 0)
- mpfr_neg (v, v, MPFR_RNDN);
+ an = MPFR_LIMB_SIZE(a);
+ bn = MPFR_LIMB_SIZE(b);
+ cn = MPFR_LIMB_SIZE(c);
+ dn = MPFR_LIMB_SIZE(d);
+ MPFR_MANT(u) = malloc ((an + bn) * sizeof (mp_limb_t));
+ MPFR_MANT(v) = malloc ((cn + dn) * sizeof (mp_limb_t));
+ if (an >= bn)
+ mpn_mul (MPFR_MANT(u), MPFR_MANT(a), an, MPFR_MANT(b), bn);
+ else
+ mpn_mul (MPFR_MANT(u), MPFR_MANT(b), bn, MPFR_MANT(a), an);
+ if ((MPFR_MANT(u)[an + bn - 1] >> (GMP_NUMB_BITS - 1)) == 0)
+ {
+ mpn_lshift (MPFR_MANT(u), MPFR_MANT(u), an + bn, 1);
+ MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b) - 1;
+ }
+ else
+ MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b);
+ if (cn >= dn)
+ mpn_mul (MPFR_MANT(v), MPFR_MANT(c), cn, MPFR_MANT(d), dn);
+ else
+ mpn_mul (MPFR_MANT(v), MPFR_MANT(d), dn, MPFR_MANT(c), cn);
+ if ((MPFR_MANT(v)[cn + dn - 1] >> (GMP_NUMB_BITS - 1)) == 0)
+ {
+ mpn_lshift (MPFR_MANT(v), MPFR_MANT(v), cn + dn, 1);
+ MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d) - 1;
+ }
+ else
+ MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d);
+ MPFR_PREC(u) = (an + bn) * GMP_NUMB_BITS;
+ MPFR_PREC(v) = (cn + dn) * GMP_NUMB_BITS;
+ MPFR_SIGN(u) = MPFR_SIGN(a) * MPFR_SIGN(b);
+ if (sign > 0)
+ MPFR_SIGN(v) = MPFR_SIGN(c) * MPFR_SIGN(d);
+ else
+ MPFR_SIGN(v) = -MPFR_SIGN(c) * MPFR_SIGN(d);
+
+ mpfr_check_range (u, 0, MPFR_RNDN);
+ mpfr_check_range (v, 0, MPFR_RNDN);
/* tentatively compute z as u+v; here we need z to be distinct
from a, b, c, d to not lose the latter */
inex = mpfr_add (z, u, v, rnd);
-
- if (mpfr_inf_p (z)) {
- /* replace by "correctly rounded overflow" */
- mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN);
- inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd);
- }
- else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) {
- /* exactly u underflowed, determine inexact flag */
- inex = (mpfr_signbit (u) ? 1 : -1);
- }
- else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) {
- /* exactly v underflowed, determine inexact flag */
- inex = (mpfr_signbit (v) ? 1 : -1);
- }
- else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) {
+
+ if (!mpfr_regular_p(z) || !mpfr_regular_p(u) || !mpfr_regular_p(v))
+ {
+ if (mpfr_inf_p (z)) {
+ /* replace by "correctly rounded overflow" */
+ mpfr_set_si (z, (mpfr_signbit (z) ? -1 : 1), MPFR_RNDN);
+ inex = mpfr_mul_2ui (z, z, mpfr_get_emax (), rnd);
+ }
+ else if (mpfr_zero_p (u) && !mpfr_zero_p (v)) {
+ /* exactly u underflowed, determine inexact flag */
+ inex = (mpfr_signbit (u) ? 1 : -1);
+ }
+ else if (mpfr_zero_p (v) && !mpfr_zero_p (u)) {
+ /* exactly v underflowed, determine inexact flag */
+ inex = (mpfr_signbit (v) ? 1 : -1);
+ }
+ else if (mpfr_nan_p (z) || (mpfr_zero_p (u) && mpfr_zero_p (v))) {
/* In the first case, u and v are infinities with opposite signs.
In the second case, u and v are zeroes; their sum may be 0 or the
least representable number, with a sign to be determined.
@@ -312,15 +351,14 @@ mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
mpfr_set_exp ((mpfr_ptr) c, ec);
mpfr_set_exp ((mpfr_ptr) d, ed);
/* works also when some of a, b, c, d are not all distinct */
- }
-
- mpfr_clear (u);
- mpfr_clear (v);
+ }
+ }
+ free (MPFR_MANT(u));
+ free (MPFR_MANT(v));
return inex;
}
-
int
mpc_mul_naive (mpc_ptr z, mpc_srcptr x, mpc_srcptr y, mpc_rnd_t rnd)
{