diff options
author | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-05-04 08:38:17 +0000 |
---|---|---|
committer | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-05-04 08:38:17 +0000 |
commit | 87ff38458263c9a9ed79a7ebd547fd32a66ae843 (patch) | |
tree | c8f4ed14e7e8d301fb3e00ba8ad43412733f2c5f /src/add1sp.c | |
parent | 0fb11bc4c54b2428a02016f68eae553c089f95ae (diff) | |
download | mpfr-87ff38458263c9a9ed79a7ebd547fd32a66ae843.tar.gz |
Merged r11198-11280 from the trunk; no conflicts but additional changes:
* About r11271 (src/add1sp.c), which introduces new special code
(function mpfr_add1sp1n), handle MPFR_RNDF in the same way as done
in similar existing special code (mpfr_add1sp1 and mpfr_add1sp2).
* In mpfr_add1sp3, do the same thing (this should have been done in
r11172, where this function was introduced via a merge).
* About r11279 (src/sub1sp.c, tests/tsub1sp.c), which introduces new
special code (function mpfr_sub1sp1n), do the same thing.
In tests/tsub1sp.c, s/RND_LOOP/RND_LOOP_NO_RNDF/ as usual to avoid
a failure.
* Note: concerning mpfr_sub1sp3, RNDF support was added at the same
time of the merge in r11179.
* Some style changes related to RNDF, in particular for consistency.
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/branches/faithful@11455 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/add1sp.c')
-rw-r--r-- | src/add1sp.c | 124 |
1 files changed, 118 insertions, 6 deletions
diff --git a/src/add1sp.c b/src/add1sp.c index 90b53ed28..be0e5529a 100644 --- a/src/add1sp.c +++ b/src/add1sp.c @@ -123,10 +123,12 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, if (bx == cx) { - /* TODO: a0 = (bp[0] >> 1) + (cp[0] >> 1) is probably better - (no long constant to load in a register). */ - /* since bp[0], cp[0] >= MPFR_LIMB_HIGHBIT, a carry always occurs */ - a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1); + /* The following line is probably better than + a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1); + as it has less dependency and doesn't need a long constant on some + processors. On ARM, it can also probably benefit from shift-and-op + in a better way. Timings cannot be conclusive. */ + a0 = (bp[0] >> 1) + (cp[0] >> 1); bx ++; rb = a0 & (MPFR_LIMB_ONE << (sh - 1)); ap[0] = a0 ^ rb; @@ -194,7 +196,7 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a)); MPFR_SET_EXP (a, bx); - if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF)) + if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET(0); else if (rnd_mode == MPFR_RNDN) { @@ -227,6 +229,111 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, } } +/* same as mpfr_add1sp, but for p = GMP_NUMB_BITS */ +static int +mpfr_add1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) +{ + mpfr_exp_t bx = MPFR_GET_EXP (b); + mpfr_exp_t cx = MPFR_GET_EXP (c); + mp_limb_t *ap = MPFR_MANT(a); + mp_limb_t *bp = MPFR_MANT(b); + mp_limb_t *cp = MPFR_MANT(c); + mp_limb_t rb; /* round bit */ + mp_limb_t sb; /* sticky bit */ + mp_limb_t a0; /* to store the result */ + mpfr_uexp_t d; + + MPFR_ASSERTD(MPFR_PREC (a) == GMP_NUMB_BITS); + MPFR_ASSERTD(MPFR_PREC (b) == GMP_NUMB_BITS); + MPFR_ASSERTD(MPFR_PREC (c) == GMP_NUMB_BITS); + + if (bx == cx) + { + a0 = bp[0] + cp[0]; + rb = a0 & MPFR_LIMB_ONE; + ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1); + bx ++; + sb = 0; /* since b + c fits on p+1 bits, the sticky bit is zero */ + } + else if (bx > cx) + { + BGreater1: + d = (mpfr_uexp_t) bx - cx; + if (d < GMP_NUMB_BITS) /* 1 <= d < GMP_NUMB_BITS */ + { + a0 = bp[0] + (cp[0] >> d); + sb = cp[0] << (GMP_NUMB_BITS - d); /* bits from cp[-1] after shift */ + if (a0 < bp[0]) /* carry */ + { + ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1); + rb = a0 & 1; + bx ++; + } + else /* no carry */ + { + ap[0] = a0; + rb = sb & MPFR_LIMB_HIGHBIT; + sb &= ~MPFR_LIMB_HIGHBIT; + } + } + else /* d >= GMP_NUMB_BITS */ + { + sb = d != GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT; + ap[0] = bp[0]; + rb = d == GMP_NUMB_BITS; + } + } + else /* bx < cx: swap b and c */ + { + mpfr_exp_t tx; + mp_limb_t *tp; + tx = bx; bx = cx; cx = tx; + tp = bp; bp = cp; cp = tp; + goto BGreater1; + } + + /* Note: we could keep the output significand in a0 for the rounding, + and only store it in ap[0] at the very end, but this seems slower + on average (but better for the worst case). */ + + /* now perform rounding */ + if (MPFR_UNLIKELY(bx > __gmpfr_emax)) + return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a)); + + MPFR_SET_EXP (a, bx); + if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) + MPFR_RET(0); + else if (rnd_mode == MPFR_RNDN) + { + /* the condition below should be rb == 0 || (rb != 0 && ...), but this + is equivalent to rb == 0 || (...) */ + if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0)) + goto truncate; + else + goto add_one_ulp; + } + else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a))) + { + truncate: + MPFR_RET(-MPFR_SIGN(a)); + } + else /* round away from zero */ + { + add_one_ulp: + ap[0] += MPFR_LIMB_ONE; + if (MPFR_UNLIKELY(ap[0] == 0)) + { + ap[0] = MPFR_LIMB_HIGHBIT; + /* no need to have MPFR_LIKELY here, since we are in a rare branch */ + if (bx + 1 <= __gmpfr_emax) + MPFR_SET_EXP (a, bx + 1); + else /* overflow */ + return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a)); + } + MPFR_RET(MPFR_SIGN(a)); + } +} + /* same as mpfr_add1sp, but for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */ static int mpfr_add1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, @@ -474,7 +581,7 @@ mpfr_add1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode, return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a)); MPFR_SET_EXP (a, bx); - if (rb == 0 && sb == 0) + if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF) MPFR_RET(0); else if (rnd_mode == MPFR_RNDN) { @@ -541,6 +648,11 @@ mpfr_add1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS) return mpfr_add1sp2 (a, b, c, rnd_mode, p); + /* we put this test after the mpfr_add1sp2() call, to avoid slowing down + the more frequent case GMP_NUMB_BITS < p < 2 * GMP_NUMB_BITS */ + if (p == GMP_NUMB_BITS) + return mpfr_add1sp1n (a, b, c, rnd_mode); + if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS) return mpfr_add1sp3 (a, b, c, rnd_mode, p); |