[src/fmma.c] speedup of mpfr_fmma and mpfr_fmms

git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11332 280ebfd0-de03-0410-8827-d642c229c3f4
author: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-02-21 14:06:56 +0000
committer: zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4> 2017-02-21 14:06:56 +0000
commit: a1284ad4d015f28a2f37d284df00c7374aec802c (patch)
tree: 7d262b55646cf2132fa94f48ca02fb706069243e
parent: 4867ceee9b32a4c46091b347764f28bdda693ef0 (diff)
download: mpfr-a1284ad4d015f28a2f37d284df00c7374aec802c.tar.gz
3 files changed, 130 insertions, 83 deletions
diff --git a/src/fmma.c b/src/fmma.c
index 9198ccefa..6168c2af0 100644
--- a/src/fmma.c
+++ b/src/fmma.c
@@ -27,8 +27,10 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
                mpfr_srcptr d, mpfr_rnd_t rnd, int neg)
 {
   mpfr_ubf_t u, v;
+  mpfr_t zz;
+  mpfr_prec_t prec_z = MPFR_PREC(z);
   mp_size_t un, vn;
-  mpfr_limb_ptr up, vp;
+  mpfr_limb_ptr up, vp, zp;
   int inex;
   MPFR_TMP_DECL(marker);
 
@@ -52,7 +54,17 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
   mpfr_ubf_mul_exact (v, c, d);
   if (neg)
     MPFR_CHANGE_SIGN (v);
-  inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
+  if (prec_z == MPFR_PREC(a) && prec_z == MPFR_PREC(b) &&
+      prec_z == MPFR_PREC(c) && prec_z == MPFR_PREC(d) &&
+      un == MPFR_PREC2LIMBS(2 * prec_z))
+    {
+      MPFR_TMP_INIT (zp, zz, 2 * prec_z, un);
+      MPFR_PREC(u) = MPFR_PREC(v) = 2 * prec_z;
+      inex = mpfr_add (zz, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
+      inex = mpfr_set_1_2 (z, zz, rnd, inex);
+    }
+  else
+    inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
 
   MPFR_UBF_CLEAR_EXP (u);
   MPFR_UBF_CLEAR_EXP (v);
diff --git a/src/set.c b/src/set.c
index cb3e8f204..b2f9c3e62 100644
--- a/src/set.c
+++ b/src/set.c
@@ -80,9 +80,15 @@ mpfr_abs (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
   return mpfr_set4 (a, b, rnd_mode, MPFR_SIGN_POS);
 }
 
-/* Round (u, inex) into s with rounding mode rnd,
-   where inex is the ternary value associated to u with the same rounding mode.
-   Assumes PREC(u) = 2*PREC(s), and PREC(s) < GMP_NUMB_LIMBS. */
+/* Round (u, inex) into s with rounding mode rnd, where inex is the ternary
+   value associated to u with the *same* rounding mode.
+   Assumes PREC(u) = 2*PREC(s).
+   The main algorithm is the following:
+   rnd=RNDZ: inex2 = mpfr_set (s, u, rnd_mode); return inex2 | inex;
+             (a negative value, if any, is preserved in inex2 | inex)
+   rnd=RNDA: idem
+   rnd=RNDN: inex2 = mpfr_set (s, u, rnd_mode);
+             if (inex2) return inex2; else return inex; */
 int
 mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex)
 {
@@ -92,89 +98,102 @@ mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex)
   mp_limb_t *sp = MPFR_MANT(s);
   mp_limb_t *up = MPFR_MANT(u);
   mp_limb_t mask;
-  int abs_inex;
+  int inex2;
 
-  MPFR_ASSERTD(MPFR_PREC(s) < GMP_NUMB_BITS);
-  MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s));
+  if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
+    {
+      mpfr_set (s, u, rnd_mode);
+      return inex;
+    }
 
-  mask = MPFR_LIMB_MASK(sh);
+  MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s));
 
-  if (MPFR_PREC(u) <= GMP_NUMB_BITS)
+  if (MPFR_PREC(s) < GMP_NUMB_BITS)
     {
-      mp_limb_t u0 = up[0];
+      mask = MPFR_LIMB_MASK(sh);
 
-      /* it suffices to round (u0, inex) */
-      rb = u0 & (MPFR_LIMB_ONE << (sh - 1));
-      sb = (u0 & mask) ^ rb;
-      sp[0] = u0 & ~mask;
-    }
-  else
-    {
-      mp_limb_t u1 = up[1];
+      if (MPFR_PREC(u) <= GMP_NUMB_BITS)
+        {
+          mp_limb_t u0 = up[0];
 
-      /* we need to round (u1, u0, inex) */
-      mask = MPFR_LIMB_MASK(sh);
-      rb = u1 & (MPFR_LIMB_ONE << (sh - 1));
-      sb = ((u1 & mask) ^ rb) | up[0];
-      sp[0] = u1 & ~mask;
-    }
+          /* it suffices to round (u0, inex) */
+          rb = u0 & (MPFR_LIMB_ONE << (sh - 1));
+          sb = (u0 & mask) ^ rb;
+          sp[0] = u0 & ~mask;
+        }
+      else
+        {
+          mp_limb_t u1 = up[1];
+          
+          /* we need to round (u1, u0, inex) */
+          mask = MPFR_LIMB_MASK(sh);
+          rb = u1 & (MPFR_LIMB_ONE << (sh - 1));
+          sb = ((u1 & mask) ^ rb) | up[0];
+          sp[0] = u1 & ~mask;
+        }
 
-  abs_inex = inex * MPFR_SIGN(u);
-  MPFR_SIGN(s) = MPFR_SIGN(u);
-  MPFR_EXP(s) = MPFR_EXP(u);
+      inex2 = inex * MPFR_SIGN(u);
+      MPFR_SIGN(s) = MPFR_SIGN(u);
+      MPFR_EXP(s) = MPFR_EXP(u);
 
-  /* in case abs_inex > 0, the value of u is rounded away,
-     thus we need to subtract something from (u0, rb, sb):
-     (a) if sb is not zero, since the subtracted value is < 1, we can leave
+      /* in case inex2 > 0, the value of u is rounded away,
+         thus we need to subtract something from (u0, rb, sb):
+         (a) if sb is not zero, since the subtracted value is < 1, we can leave
          sb as it is;
-     (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1
-     (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */
-  if (abs_inex > 0)
-    {
-      if (rb && sb == 0)
+         (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1
+         (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */
+      if (inex2 > 0)
         {
-          rb = 0;
-          sb = 1;
+          if (rb && sb == 0)
+            {
+              rb = 0;
+              sb = 1;
+            }
         }
-    }
-  else /* abs_inex <= 0 */
-    sb |= inex;
+      else /* inex2 <= 0 */
+        sb |= inex;
 
-  /* now rb, sb are the correct round and sticky bits, together with the value
-     of sp[0], except possibly in the case rb = sb = 0 and abs_inex > 0 */
-  if (rb == 0 && sb == 0)
-    {
-      if (abs_inex <= 0)
-        MPFR_RET(0);
-      else /* abs_inex > 0 can only occur for RNDN and RNDA:
-              RNDN: return sp[0] and inex
-              RNDA: return sp[0] and inex */
-        MPFR_RET(inex);
-    }
-  else if (rnd_mode == MPFR_RNDN)
-    {
-      if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0))
-        goto truncate;
-      else
-        goto add_one_ulp;
-    }
-  else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s)))
-    {
-    truncate:
-      MPFR_RET(-MPFR_SIGN(s));
-    }
-  else /* round away from zero */
-    {
-    add_one_ulp:
-      sp[0] += MPFR_LIMB_ONE << sh;
-      if (MPFR_UNLIKELY(sp[0] == 0))
+      /* now rb, sb are the round and sticky bits, together with the value of
+         sp[0], except possibly in the case rb = sb = 0 and inex2 > 0 */
+      if (rb == 0 && sb == 0)
+        {
+          if (inex2 <= 0)
+            MPFR_RET(0);
+          else /* inex2 > 0 can only occur for RNDN and RNDA:
+                  RNDN: return sp[0] and inex
+                  RNDA: return sp[0] and inex */
+            MPFR_RET(inex);
+        }
+      else if (rnd_mode == MPFR_RNDN)
+        {
+          if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0))
+            goto truncate;
+          else
+            goto add_one_ulp;
+        }
+      else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s)))
+        {
+        truncate:
+          MPFR_RET(-MPFR_SIGN(s));
+        }
+      else /* round away from zero */
         {
-          sp[0] = MPFR_LIMB_HIGHBIT;
-          if (MPFR_EXP(s) + 1 <= __gmpfr_emax)
-            MPFR_SET_EXP (s, MPFR_EXP(s) + 1);
-          else /* overflow */
-            return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s));
+        add_one_ulp:
+          sp[0] += MPFR_LIMB_ONE << sh;
+          if (MPFR_UNLIKELY(sp[0] == 0))
+            {
+              sp[0] = MPFR_LIMB_HIGHBIT;
+              if (MPFR_EXP(s) + 1 <= __gmpfr_emax)
+                MPFR_SET_EXP (s, MPFR_EXP(s) + 1);
+              else /* overflow */
+                return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s));
+            }
+          MPFR_RET(MPFR_SIGN(s));
         }
-      MPFR_RET(MPFR_SIGN(s));
     }
+
+  /* general case PREC(s) >= GMP_NUMB_BITS */
+  inex2 = mpfr_set (s, u, rnd_mode);
+  return (rnd_mode != MPFR_RNDN) ? inex | inex2
+    : (inex2) ? inex2 : inex;
 }
diff --git a/src/ubf.c b/src/ubf.c
index 78eb90e37..efee51e7d 100644
--- a/src/ubf.c
+++ b/src/ubf.c
@@ -20,6 +20,7 @@ along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
 
+#define MPFR_NEED_LONGLONG_H
 #include "mpfr-impl.h"
 
 /* Note: In MPFR math functions, even if UBF code is not called first,
@@ -128,17 +129,32 @@ mpfr_ubf_mul_exact (mpfr_ubf_ptr a, mpfr_srcptr b, mpfr_srcptr c)
 
       ap = MPFR_MANT (a);
 
-      u = (bn >= cn) ?
-        mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) :
-        mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn);
-      if (MPFR_UNLIKELY (MPFR_LIMB_MSB (u) == 0))
+      if (bn == 1 && cn == 1)
         {
-          m = 1;
-          MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1));
-          MPFR_ASSERTD (v == 0);
+          umul_ppmm (ap[1], ap[0], MPFR_MANT(b)[0], MPFR_MANT(c)[0]);
+          if (ap[1] & MPFR_LIMB_HIGHBIT)
+            m = 0;
+          else
+            {
+              ap[1] = (ap[1] << 1) | (ap[0] >> (GMP_NUMB_BITS - 1));
+              ap[0] = ap[0] << 1;
+              m = 1;
+            }
         }
       else
-        m = 0;
+        {
+          u = (bn >= cn) ?
+            mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) :
+            mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn);
+          if (MPFR_LIMB_MSB (u) == 0)
+            {
+              m = 1;
+              MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1));
+              MPFR_ASSERTD (v == 0);
+            }
+          else
+            m = 0;
+        }
 
       if (! MPFR_IS_UBF (b) && ! MPFR_IS_UBF (c) &&
           (e = MPFR_GET_EXP (b) + MPFR_GET_EXP (c) - m,
author	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-02-21 14:06:56 +0000
committer	zimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>	2017-02-21 14:06:56 +0000
commit	a1284ad4d015f28a2f37d284df00c7374aec802c (patch)
tree	7d262b55646cf2132fa94f48ca02fb706069243e
parent	4867ceee9b32a4c46091b347764f28bdda693ef0 (diff)
download	mpfr-a1284ad4d015f28a2f37d284df00c7374aec802c.tar.gz