diff options
author | zimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2012-06-26 13:47:24 +0000 |
---|---|---|
committer | zimmerma <zimmerma@211d60ee-9f03-0410-a15a-8952a2c7a4e4> | 2012-06-26 13:47:24 +0000 |
commit | 5a0dbb6b0f9b195e4b6c93e0b8eb31100002ed56 (patch) | |
tree | d2663d82c993ed5d12a86006d9676b5a3742d38e /src | |
parent | baab7fa262e0bc5dc5faaf146a35eb8de6f4ebb1 (diff) | |
download | mpc-5a0dbb6b0f9b195e4b6c93e0b8eb31100002ed56.tar.gz |
[fma.c] new code (hopefully faster?) after discussion with Andreas Enge,
written with Benjamin Dadoun
git-svn-id: svn://scm.gforge.inria.fr/svn/mpc/trunk@1164 211d60ee-9f03-0410-a15a-8952a2c7a4e4
Diffstat (limited to 'src')
-rw-r--r-- | src/fma.c | 63 |
1 files changed, 61 insertions, 2 deletions
@@ -39,8 +39,8 @@ bound_prec_addsub (mpfr_srcptr x, mpfr_srcptr y) } /* r <- a*b+c */ -int -mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) +static int +mpc_fma_naive (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) { mpfr_t rea_reb, rea_imb, ima_reb, ima_imb, tmp; mpfr_prec_t pre12, pre13, pre23, pim12, pim13, pim23; @@ -130,3 +130,62 @@ mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) return MPC_INEX(inex_re, inex_im); } +/* The algorithm is as follows: + - in a first pass, we use the target precision + some extra bits + - if it fails, we add the number of cancelled bits when adding + Re(a*b) and Re(c) [similarly for the imaginary part] + - it is fails again, we call the mpc_fma_naive function, which also + deals with the special cases */ +int +mpc_fma (mpc_ptr r, mpc_srcptr a, mpc_srcptr b, mpc_srcptr c, mpc_rnd_t rnd) +{ + mpc_t ab; + mpfr_prec_t pre, pim, wpre, wpim; + mpfr_exp_t diffre, diffim; + int i, inex, okre = 0, okim = 0; + + if (mpc_fin_p (a) == 0 || mpc_fin_p (b) == 0 || mpc_fin_p (c) == 0) + return mpc_fma_naive (r, a, b, c, rnd); + + pre = mpfr_get_prec (mpc_realref(r)); + pim = mpfr_get_prec (mpc_imagref(r)); + wpre = pre + mpc_ceil_log2 (pre) + 10; + wpim = pim + mpc_ceil_log2 (pim) + 10; + mpc_init3 (ab, wpre, wpim); + for (i = 0; i < 2; ++i) + { + mpc_mul (ab, a, b, MPC_RNDZZ); + if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab))) + break; + diffre = mpfr_get_exp (mpc_realref(ab)); + diffim = mpfr_get_exp (mpc_imagref(ab)); + if (mpfr_zero_p (mpc_realref(ab)) || mpfr_zero_p (mpc_imagref(ab))) + break; + mpc_add (ab, ab, c, MPC_RNDZZ); + diffre -= mpfr_get_exp (mpc_realref(ab)); + diffim -= mpfr_get_exp (mpc_imagref(ab)); + diffre = (diffre > 0 ? diffre + 1 : 1); + diffim = (diffim > 0 ? diffim + 1 : 1); + okre = diffre > wpre ? 0 : mpfr_can_round (mpc_realref(ab), + wpre - diffre, GMP_RNDZ, GMP_RNDZ, + pre + (MPC_RND_RE (rnd) == GMP_RNDN)); + okim = diffim > wpim ? 0 : mpfr_can_round (mpc_imagref(ab), + wpim - diffim, GMP_RNDZ, GMP_RNDZ, + pim + (MPC_RND_IM (rnd) == GMP_RNDN)); + if (okre && okim) + { + inex = mpc_set (r, ab, rnd); + break; + } + if (i == 1) + break; + if (okre == 0 && diffre > 1) + wpre += diffre; + if (okim == 0 && diffim > 1) + wpim += diffim; + mpfr_set_prec (mpc_realref(ab), wpre); + mpfr_set_prec (mpc_imagref(ab), wpim); + } + mpc_clear (ab); + return okre && okim ? inex : mpc_fma_naive (r, a, b, c, rnd); +} |