summaryrefslogtreecommitdiff
path: root/src/add1sp.c
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-05-04 08:38:17 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-05-04 08:38:17 +0000
commit87ff38458263c9a9ed79a7ebd547fd32a66ae843 (patch)
treec8f4ed14e7e8d301fb3e00ba8ad43412733f2c5f /src/add1sp.c
parent0fb11bc4c54b2428a02016f68eae553c089f95ae (diff)
downloadmpfr-87ff38458263c9a9ed79a7ebd547fd32a66ae843.tar.gz
Merged r11198-11280 from the trunk; no conflicts but additional changes:
* About r11271 (src/add1sp.c), which introduces new special code (function mpfr_add1sp1n), handle MPFR_RNDF in the same way as done in similar existing special code (mpfr_add1sp1 and mpfr_add1sp2). * In mpfr_add1sp3, do the same thing (this should have been done in r11172, where this function was introduced via a merge). * About r11279 (src/sub1sp.c, tests/tsub1sp.c), which introduces new special code (function mpfr_sub1sp1n), do the same thing. In tests/tsub1sp.c, s/RND_LOOP/RND_LOOP_NO_RNDF/ as usual to avoid a failure. * Note: concerning mpfr_sub1sp3, RNDF support was added at the same time of the merge in r11179. * Some style changes related to RNDF, in particular for consistency. git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/branches/faithful@11455 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/add1sp.c')
-rw-r--r--src/add1sp.c124
1 files changed, 118 insertions, 6 deletions
diff --git a/src/add1sp.c b/src/add1sp.c
index 90b53ed28..be0e5529a 100644
--- a/src/add1sp.c
+++ b/src/add1sp.c
@@ -123,10 +123,12 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
if (bx == cx)
{
- /* TODO: a0 = (bp[0] >> 1) + (cp[0] >> 1) is probably better
- (no long constant to load in a register). */
- /* since bp[0], cp[0] >= MPFR_LIMB_HIGHBIT, a carry always occurs */
- a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1);
+ /* The following line is probably better than
+ a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1);
+ as it has less dependency and doesn't need a long constant on some
+ processors. On ARM, it can also probably benefit from shift-and-op
+ in a better way. Timings cannot be conclusive. */
+ a0 = (bp[0] >> 1) + (cp[0] >> 1);
bx ++;
rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
ap[0] = a0 ^ rb;
@@ -194,7 +196,7 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
MPFR_SET_EXP (a, bx);
- if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF))
+ if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
MPFR_RET(0);
else if (rnd_mode == MPFR_RNDN)
{
@@ -227,6 +229,111 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
}
}
+/* same as mpfr_add1sp, but for p = GMP_NUMB_BITS */
+static int
+mpfr_add1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
+{
+ mpfr_exp_t bx = MPFR_GET_EXP (b);
+ mpfr_exp_t cx = MPFR_GET_EXP (c);
+ mp_limb_t *ap = MPFR_MANT(a);
+ mp_limb_t *bp = MPFR_MANT(b);
+ mp_limb_t *cp = MPFR_MANT(c);
+ mp_limb_t rb; /* round bit */
+ mp_limb_t sb; /* sticky bit */
+ mp_limb_t a0; /* to store the result */
+ mpfr_uexp_t d;
+
+ MPFR_ASSERTD(MPFR_PREC (a) == GMP_NUMB_BITS);
+ MPFR_ASSERTD(MPFR_PREC (b) == GMP_NUMB_BITS);
+ MPFR_ASSERTD(MPFR_PREC (c) == GMP_NUMB_BITS);
+
+ if (bx == cx)
+ {
+ a0 = bp[0] + cp[0];
+ rb = a0 & MPFR_LIMB_ONE;
+ ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1);
+ bx ++;
+ sb = 0; /* since b + c fits on p+1 bits, the sticky bit is zero */
+ }
+ else if (bx > cx)
+ {
+ BGreater1:
+ d = (mpfr_uexp_t) bx - cx;
+ if (d < GMP_NUMB_BITS) /* 1 <= d < GMP_NUMB_BITS */
+ {
+ a0 = bp[0] + (cp[0] >> d);
+ sb = cp[0] << (GMP_NUMB_BITS - d); /* bits from cp[-1] after shift */
+ if (a0 < bp[0]) /* carry */
+ {
+ ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1);
+ rb = a0 & 1;
+ bx ++;
+ }
+ else /* no carry */
+ {
+ ap[0] = a0;
+ rb = sb & MPFR_LIMB_HIGHBIT;
+ sb &= ~MPFR_LIMB_HIGHBIT;
+ }
+ }
+ else /* d >= GMP_NUMB_BITS */
+ {
+ sb = d != GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT;
+ ap[0] = bp[0];
+ rb = d == GMP_NUMB_BITS;
+ }
+ }
+ else /* bx < cx: swap b and c */
+ {
+ mpfr_exp_t tx;
+ mp_limb_t *tp;
+ tx = bx; bx = cx; cx = tx;
+ tp = bp; bp = cp; cp = tp;
+ goto BGreater1;
+ }
+
+ /* Note: we could keep the output significand in a0 for the rounding,
+ and only store it in ap[0] at the very end, but this seems slower
+ on average (but better for the worst case). */
+
+ /* now perform rounding */
+ if (MPFR_UNLIKELY(bx > __gmpfr_emax))
+ return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
+
+ MPFR_SET_EXP (a, bx);
+ if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
+ MPFR_RET(0);
+ else if (rnd_mode == MPFR_RNDN)
+ {
+ /* the condition below should be rb == 0 || (rb != 0 && ...), but this
+ is equivalent to rb == 0 || (...) */
+ if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
+ goto truncate;
+ else
+ goto add_one_ulp;
+ }
+ else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
+ {
+ truncate:
+ MPFR_RET(-MPFR_SIGN(a));
+ }
+ else /* round away from zero */
+ {
+ add_one_ulp:
+ ap[0] += MPFR_LIMB_ONE;
+ if (MPFR_UNLIKELY(ap[0] == 0))
+ {
+ ap[0] = MPFR_LIMB_HIGHBIT;
+ /* no need to have MPFR_LIKELY here, since we are in a rare branch */
+ if (bx + 1 <= __gmpfr_emax)
+ MPFR_SET_EXP (a, bx + 1);
+ else /* overflow */
+ return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
+ }
+ MPFR_RET(MPFR_SIGN(a));
+ }
+}
+
/* same as mpfr_add1sp, but for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
static int
mpfr_add1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
@@ -474,7 +581,7 @@ mpfr_add1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
MPFR_SET_EXP (a, bx);
- if (rb == 0 && sb == 0)
+ if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
MPFR_RET(0);
else if (rnd_mode == MPFR_RNDN)
{
@@ -541,6 +648,11 @@ mpfr_add1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS)
return mpfr_add1sp2 (a, b, c, rnd_mode, p);
+ /* we put this test after the mpfr_add1sp2() call, to avoid slowing down
+ the more frequent case GMP_NUMB_BITS < p < 2 * GMP_NUMB_BITS */
+ if (p == GMP_NUMB_BITS)
+ return mpfr_add1sp1n (a, b, c, rnd_mode);
+
if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS)
return mpfr_add1sp3 (a, b, c, rnd_mode, p);