diff options
author | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-02-21 14:06:56 +0000 |
---|---|---|
committer | zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-02-21 14:06:56 +0000 |
commit | a1284ad4d015f28a2f37d284df00c7374aec802c (patch) | |
tree | 7d262b55646cf2132fa94f48ca02fb706069243e | |
parent | 4867ceee9b32a4c46091b347764f28bdda693ef0 (diff) | |
download | mpfr-a1284ad4d015f28a2f37d284df00c7374aec802c.tar.gz |
[src/fmma.c] speedup of mpfr_fmma and mpfr_fmms
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11332 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r-- | src/fmma.c | 16 | ||||
-rw-r--r-- | src/set.c | 165 | ||||
-rw-r--r-- | src/ubf.c | 32 |
3 files changed, 130 insertions, 83 deletions
diff --git a/src/fmma.c b/src/fmma.c index 9198ccefa..6168c2af0 100644 --- a/src/fmma.c +++ b/src/fmma.c @@ -27,8 +27,10 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_srcptr d, mpfr_rnd_t rnd, int neg) { mpfr_ubf_t u, v; + mpfr_t zz; + mpfr_prec_t prec_z = MPFR_PREC(z); mp_size_t un, vn; - mpfr_limb_ptr up, vp; + mpfr_limb_ptr up, vp, zp; int inex; MPFR_TMP_DECL(marker); @@ -52,7 +54,17 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_ubf_mul_exact (v, c, d); if (neg) MPFR_CHANGE_SIGN (v); - inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd); + if (prec_z == MPFR_PREC(a) && prec_z == MPFR_PREC(b) && + prec_z == MPFR_PREC(c) && prec_z == MPFR_PREC(d) && + un == MPFR_PREC2LIMBS(2 * prec_z)) + { + MPFR_TMP_INIT (zp, zz, 2 * prec_z, un); + MPFR_PREC(u) = MPFR_PREC(v) = 2 * prec_z; + inex = mpfr_add (zz, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd); + inex = mpfr_set_1_2 (z, zz, rnd, inex); + } + else + inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd); MPFR_UBF_CLEAR_EXP (u); MPFR_UBF_CLEAR_EXP (v); @@ -80,9 +80,15 @@ mpfr_abs (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode) return mpfr_set4 (a, b, rnd_mode, MPFR_SIGN_POS); } -/* Round (u, inex) into s with rounding mode rnd, - where inex is the ternary value associated to u with the same rounding mode. - Assumes PREC(u) = 2*PREC(s), and PREC(s) < GMP_NUMB_LIMBS. */ +/* Round (u, inex) into s with rounding mode rnd, where inex is the ternary + value associated to u with the *same* rounding mode. + Assumes PREC(u) = 2*PREC(s). + The main algorithm is the following: + rnd=RNDZ: inex2 = mpfr_set (s, u, rnd_mode); return inex2 | inex; + (a negative value, if any, is preserved in inex2 | inex) + rnd=RNDA: idem + rnd=RNDN: inex2 = mpfr_set (s, u, rnd_mode); + if (inex2) return inex2; else return inex; */ int mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex) { @@ -92,89 +98,102 @@ mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex) mp_limb_t *sp = MPFR_MANT(s); mp_limb_t *up = MPFR_MANT(u); mp_limb_t mask; - int abs_inex; + int inex2; - MPFR_ASSERTD(MPFR_PREC(s) < GMP_NUMB_BITS); - MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s)); + if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u))) + { + mpfr_set (s, u, rnd_mode); + return inex; + } - mask = MPFR_LIMB_MASK(sh); + MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s)); - if (MPFR_PREC(u) <= GMP_NUMB_BITS) + if (MPFR_PREC(s) < GMP_NUMB_BITS) { - mp_limb_t u0 = up[0]; + mask = MPFR_LIMB_MASK(sh); - /* it suffices to round (u0, inex) */ - rb = u0 & (MPFR_LIMB_ONE << (sh - 1)); - sb = (u0 & mask) ^ rb; - sp[0] = u0 & ~mask; - } - else - { - mp_limb_t u1 = up[1]; + if (MPFR_PREC(u) <= GMP_NUMB_BITS) + { + mp_limb_t u0 = up[0]; - /* we need to round (u1, u0, inex) */ - mask = MPFR_LIMB_MASK(sh); - rb = u1 & (MPFR_LIMB_ONE << (sh - 1)); - sb = ((u1 & mask) ^ rb) | up[0]; - sp[0] = u1 & ~mask; - } + /* it suffices to round (u0, inex) */ + rb = u0 & (MPFR_LIMB_ONE << (sh - 1)); + sb = (u0 & mask) ^ rb; + sp[0] = u0 & ~mask; + } + else + { + mp_limb_t u1 = up[1]; + + /* we need to round (u1, u0, inex) */ + mask = MPFR_LIMB_MASK(sh); + rb = u1 & (MPFR_LIMB_ONE << (sh - 1)); + sb = ((u1 & mask) ^ rb) | up[0]; + sp[0] = u1 & ~mask; + } - abs_inex = inex * MPFR_SIGN(u); - MPFR_SIGN(s) = MPFR_SIGN(u); - MPFR_EXP(s) = MPFR_EXP(u); + inex2 = inex * MPFR_SIGN(u); + MPFR_SIGN(s) = MPFR_SIGN(u); + MPFR_EXP(s) = MPFR_EXP(u); - /* in case abs_inex > 0, the value of u is rounded away, - thus we need to subtract something from (u0, rb, sb): - (a) if sb is not zero, since the subtracted value is < 1, we can leave + /* in case inex2 > 0, the value of u is rounded away, + thus we need to subtract something from (u0, rb, sb): + (a) if sb is not zero, since the subtracted value is < 1, we can leave sb as it is; - (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1 - (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */ - if (abs_inex > 0) - { - if (rb && sb == 0) + (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1 + (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */ + if (inex2 > 0) { - rb = 0; - sb = 1; + if (rb && sb == 0) + { + rb = 0; + sb = 1; + } } - } - else /* abs_inex <= 0 */ - sb |= inex; + else /* inex2 <= 0 */ + sb |= inex; - /* now rb, sb are the correct round and sticky bits, together with the value - of sp[0], except possibly in the case rb = sb = 0 and abs_inex > 0 */ - if (rb == 0 && sb == 0) - { - if (abs_inex <= 0) - MPFR_RET(0); - else /* abs_inex > 0 can only occur for RNDN and RNDA: - RNDN: return sp[0] and inex - RNDA: return sp[0] and inex */ - MPFR_RET(inex); - } - else if (rnd_mode == MPFR_RNDN) - { - if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0)) - goto truncate; - else - goto add_one_ulp; - } - else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s))) - { - truncate: - MPFR_RET(-MPFR_SIGN(s)); - } - else /* round away from zero */ - { - add_one_ulp: - sp[0] += MPFR_LIMB_ONE << sh; - if (MPFR_UNLIKELY(sp[0] == 0)) + /* now rb, sb are the round and sticky bits, together with the value of + sp[0], except possibly in the case rb = sb = 0 and inex2 > 0 */ + if (rb == 0 && sb == 0) + { + if (inex2 <= 0) + MPFR_RET(0); + else /* inex2 > 0 can only occur for RNDN and RNDA: + RNDN: return sp[0] and inex + RNDA: return sp[0] and inex */ + MPFR_RET(inex); + } + else if (rnd_mode == MPFR_RNDN) + { + if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0)) + goto truncate; + else + goto add_one_ulp; + } + else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s))) + { + truncate: + MPFR_RET(-MPFR_SIGN(s)); + } + else /* round away from zero */ { - sp[0] = MPFR_LIMB_HIGHBIT; - if (MPFR_EXP(s) + 1 <= __gmpfr_emax) - MPFR_SET_EXP (s, MPFR_EXP(s) + 1); - else /* overflow */ - return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s)); + add_one_ulp: + sp[0] += MPFR_LIMB_ONE << sh; + if (MPFR_UNLIKELY(sp[0] == 0)) + { + sp[0] = MPFR_LIMB_HIGHBIT; + if (MPFR_EXP(s) + 1 <= __gmpfr_emax) + MPFR_SET_EXP (s, MPFR_EXP(s) + 1); + else /* overflow */ + return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s)); + } + MPFR_RET(MPFR_SIGN(s)); } - MPFR_RET(MPFR_SIGN(s)); } + + /* general case PREC(s) >= GMP_NUMB_BITS */ + inex2 = mpfr_set (s, u, rnd_mode); + return (rnd_mode != MPFR_RNDN) ? inex | inex2 + : (inex2) ? inex2 : inex; } @@ -20,6 +20,7 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ +#define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" /* Note: In MPFR math functions, even if UBF code is not called first, @@ -128,17 +129,32 @@ mpfr_ubf_mul_exact (mpfr_ubf_ptr a, mpfr_srcptr b, mpfr_srcptr c) ap = MPFR_MANT (a); - u = (bn >= cn) ? - mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) : - mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn); - if (MPFR_UNLIKELY (MPFR_LIMB_MSB (u) == 0)) + if (bn == 1 && cn == 1) { - m = 1; - MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1)); - MPFR_ASSERTD (v == 0); + umul_ppmm (ap[1], ap[0], MPFR_MANT(b)[0], MPFR_MANT(c)[0]); + if (ap[1] & MPFR_LIMB_HIGHBIT) + m = 0; + else + { + ap[1] = (ap[1] << 1) | (ap[0] >> (GMP_NUMB_BITS - 1)); + ap[0] = ap[0] << 1; + m = 1; + } } else - m = 0; + { + u = (bn >= cn) ? + mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) : + mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn); + if (MPFR_LIMB_MSB (u) == 0) + { + m = 1; + MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1)); + MPFR_ASSERTD (v == 0); + } + else + m = 0; + } if (! MPFR_IS_UBF (b) && ! MPFR_IS_UBF (c) && (e = MPFR_GET_EXP (b) + MPFR_GET_EXP (c) - m, |