Merged r11198-11280 from the trunk; no conflicts but additional changes:

* About r11271 (src/add1sp.c), which introduces new special code (function mpfr_add1sp1n), handle MPFR_RNDF in the same way as done in similar existing special code (mpfr_add1sp1 and mpfr_add1sp2). * In mpfr_add1sp3, do the same thing (this should have been done in r11172, where this function was introduced via a merge). * About r11279 (src/sub1sp.c, tests/tsub1sp.c), which introduces new special code (function mpfr_sub1sp1n), do the same thing. In tests/tsub1sp.c, s/RND_LOOP/RND_LOOP_NO_RNDF/ as usual to avoid a failure. * Note: concerning mpfr_sub1sp3, RNDF support was added at the same time of the merge in r11179. * Some style changes related to RNDF, in particular for consistency. git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/branches/faithful@11455 280ebfd0-de03-0410-8827-d642c229c3f4
author: vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-05-04 08:38:17 +0000
committer: vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-05-04 08:38:17 +0000
commit: 87ff38458263c9a9ed79a7ebd547fd32a66ae843 (patch)
tree: c8f4ed14e7e8d301fb3e00ba8ad43412733f2c5f /src/add1sp.c
parent: 0fb11bc4c54b2428a02016f68eae553c089f95ae (diff)
download: mpfr-87ff38458263c9a9ed79a7ebd547fd32a66ae843.tar.gz
1 files changed, 118 insertions, 6 deletions
diff --git a/src/add1sp.c b/src/add1sp.c
index 90b53ed28..be0e5529a 100644
--- a/src/add1sp.c
+++ b/src/add1sp.c
@@ -123,10 +123,12 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
 
   if (bx == cx)
     {
-      /* TODO: a0 = (bp[0] >> 1) + (cp[0] >> 1) is probably better
-         (no long constant to load in a register). */
-      /* since bp[0], cp[0] >= MPFR_LIMB_HIGHBIT, a carry always occurs */
-      a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1);
+      /* The following line is probably better than
+           a0 = MPFR_LIMB_HIGHBIT | ((bp[0] + cp[0]) >> 1);
+         as it has less dependency and doesn't need a long constant on some
+         processors. On ARM, it can also probably benefit from shift-and-op
+         in a better way. Timings cannot be conclusive. */
+      a0 = (bp[0] >> 1) + (cp[0] >> 1);
       bx ++;
       rb = a0 & (MPFR_LIMB_ONE << (sh - 1));
       ap[0] = a0 ^ rb;
@@ -194,7 +196,7 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
 
   MPFR_SET_EXP (a, bx);
-  if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF))
+  if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     MPFR_RET(0);
   else if (rnd_mode == MPFR_RNDN)
     {
@@ -227,6 +229,111 @@ mpfr_add1sp1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     }
 }
 
+/* same as mpfr_add1sp, but for p = GMP_NUMB_BITS */
+static int
+mpfr_add1sp1n (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
+{
+  mpfr_exp_t bx = MPFR_GET_EXP (b);
+  mpfr_exp_t cx = MPFR_GET_EXP (c);
+  mp_limb_t *ap = MPFR_MANT(a);
+  mp_limb_t *bp = MPFR_MANT(b);
+  mp_limb_t *cp = MPFR_MANT(c);
+  mp_limb_t rb; /* round bit */
+  mp_limb_t sb; /* sticky bit */
+  mp_limb_t a0; /* to store the result */
+  mpfr_uexp_t d;
+
+  MPFR_ASSERTD(MPFR_PREC (a) == GMP_NUMB_BITS);
+  MPFR_ASSERTD(MPFR_PREC (b) == GMP_NUMB_BITS);
+  MPFR_ASSERTD(MPFR_PREC (c) == GMP_NUMB_BITS);
+
+  if (bx == cx)
+    {
+      a0 = bp[0] + cp[0];
+      rb = a0 & MPFR_LIMB_ONE;
+      ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1);
+      bx ++;
+      sb = 0; /* since b + c fits on p+1 bits, the sticky bit is zero */
+    }
+  else if (bx > cx)
+    {
+    BGreater1:
+      d = (mpfr_uexp_t) bx - cx;
+      if (d < GMP_NUMB_BITS) /* 1 <= d < GMP_NUMB_BITS */
+        {
+          a0 = bp[0] + (cp[0] >> d);
+          sb = cp[0] << (GMP_NUMB_BITS - d); /* bits from cp[-1] after shift */
+          if (a0 < bp[0]) /* carry */
+            {
+              ap[0] = MPFR_LIMB_HIGHBIT | (a0 >> 1);
+              rb = a0 & 1;
+              bx ++;
+            }
+          else /* no carry */
+            {
+              ap[0] = a0;
+              rb = sb & MPFR_LIMB_HIGHBIT;
+              sb &= ~MPFR_LIMB_HIGHBIT;
+            }
+        }
+      else /* d >= GMP_NUMB_BITS */
+        {
+          sb = d != GMP_NUMB_BITS || cp[0] != MPFR_LIMB_HIGHBIT;
+          ap[0] = bp[0];
+          rb = d == GMP_NUMB_BITS;
+        }
+    }
+  else /* bx < cx: swap b and c */
+    {
+      mpfr_exp_t tx;
+      mp_limb_t *tp;
+      tx = bx; bx = cx; cx = tx;
+      tp = bp; bp = cp; cp = tp;
+      goto BGreater1;
+    }
+
+  /* Note: we could keep the output significand in a0 for the rounding,
+     and only store it in ap[0] at the very end, but this seems slower
+     on average (but better for the worst case). */
+
+  /* now perform rounding */
+  if (MPFR_UNLIKELY(bx > __gmpfr_emax))
+    return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
+
+  MPFR_SET_EXP (a, bx);
+  if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
+    MPFR_RET(0);
+  else if (rnd_mode == MPFR_RNDN)
+    {
+      /* the condition below should be rb == 0 || (rb != 0 && ...), but this
+         is equivalent to rb == 0 || (...) */
+      if (rb == 0 || (sb == 0 && (ap[0] & MPFR_LIMB_ONE) == 0))
+        goto truncate;
+      else
+        goto add_one_ulp;
+    }
+  else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(a)))
+    {
+    truncate:
+      MPFR_RET(-MPFR_SIGN(a));
+    }
+  else /* round away from zero */
+    {
+    add_one_ulp:
+      ap[0] += MPFR_LIMB_ONE;
+      if (MPFR_UNLIKELY(ap[0] == 0))
+        {
+          ap[0] = MPFR_LIMB_HIGHBIT;
+          /* no need to have MPFR_LIKELY here, since we are in a rare branch */
+          if (bx + 1 <= __gmpfr_emax)
+            MPFR_SET_EXP (a, bx + 1);
+          else /* overflow */
+            return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
+        }
+      MPFR_RET(MPFR_SIGN(a));
+    }
+}
+
 /* same as mpfr_add1sp, but for GMP_NUMB_BITS < p < 2*GMP_NUMB_BITS */
 static int
 mpfr_add1sp2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
@@ -474,7 +581,7 @@ mpfr_add1sp3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode,
     return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a));
 
   MPFR_SET_EXP (a, bx);
-  if (rb == 0 && sb == 0)
+  if ((rb == 0 && sb == 0) || rnd_mode == MPFR_RNDF)
     MPFR_RET(0);
   else if (rnd_mode == MPFR_RNDN)
     {
@@ -541,6 +648,11 @@ mpfr_add1sp (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode)
   if (GMP_NUMB_BITS < p && p < 2 * GMP_NUMB_BITS)
     return mpfr_add1sp2 (a, b, c, rnd_mode, p);
 
+  /* we put this test after the mpfr_add1sp2() call, to avoid slowing down
+     the more frequent case GMP_NUMB_BITS < p < 2 * GMP_NUMB_BITS */
+  if (p == GMP_NUMB_BITS)
+    return mpfr_add1sp1n (a, b, c, rnd_mode);
+
   if (2 * GMP_NUMB_BITS < p && p < 3 * GMP_NUMB_BITS)
     return mpfr_add1sp3 (a, b, c, rnd_mode, p);
author	vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-05-04 08:38:17 +0000
committer	vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-05-04 08:38:17 +0000
commit	87ff38458263c9a9ed79a7ebd547fd32a66ae843 (patch)
tree	c8f4ed14e7e8d301fb3e00ba8ad43412733f2c5f /src/add1sp.c
parent	0fb11bc4c54b2428a02016f68eae553c089f95ae (diff)
download	mpfr-87ff38458263c9a9ed79a7ebd547fd32a66ae843.tar.gz