summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-02-21 14:06:56 +0000
committerzimmerma <zimmerma@280ebfd0-de03-0410-8827-d642c229c3f4>2017-02-21 14:06:56 +0000
commita1284ad4d015f28a2f37d284df00c7374aec802c (patch)
tree7d262b55646cf2132fa94f48ca02fb706069243e
parent4867ceee9b32a4c46091b347764f28bdda693ef0 (diff)
downloadmpfr-a1284ad4d015f28a2f37d284df00c7374aec802c.tar.gz
[src/fmma.c] speedup of mpfr_fmma and mpfr_fmms
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/trunk@11332 280ebfd0-de03-0410-8827-d642c229c3f4
-rw-r--r--src/fmma.c16
-rw-r--r--src/set.c165
-rw-r--r--src/ubf.c32
3 files changed, 130 insertions, 83 deletions
diff --git a/src/fmma.c b/src/fmma.c
index 9198ccefa..6168c2af0 100644
--- a/src/fmma.c
+++ b/src/fmma.c
@@ -27,8 +27,10 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
mpfr_srcptr d, mpfr_rnd_t rnd, int neg)
{
mpfr_ubf_t u, v;
+ mpfr_t zz;
+ mpfr_prec_t prec_z = MPFR_PREC(z);
mp_size_t un, vn;
- mpfr_limb_ptr up, vp;
+ mpfr_limb_ptr up, vp, zp;
int inex;
MPFR_TMP_DECL(marker);
@@ -52,7 +54,17 @@ mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c,
mpfr_ubf_mul_exact (v, c, d);
if (neg)
MPFR_CHANGE_SIGN (v);
- inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
+ if (prec_z == MPFR_PREC(a) && prec_z == MPFR_PREC(b) &&
+ prec_z == MPFR_PREC(c) && prec_z == MPFR_PREC(d) &&
+ un == MPFR_PREC2LIMBS(2 * prec_z))
+ {
+ MPFR_TMP_INIT (zp, zz, 2 * prec_z, un);
+ MPFR_PREC(u) = MPFR_PREC(v) = 2 * prec_z;
+ inex = mpfr_add (zz, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
+ inex = mpfr_set_1_2 (z, zz, rnd, inex);
+ }
+ else
+ inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd);
MPFR_UBF_CLEAR_EXP (u);
MPFR_UBF_CLEAR_EXP (v);
diff --git a/src/set.c b/src/set.c
index cb3e8f204..b2f9c3e62 100644
--- a/src/set.c
+++ b/src/set.c
@@ -80,9 +80,15 @@ mpfr_abs (mpfr_ptr a, mpfr_srcptr b, mpfr_rnd_t rnd_mode)
return mpfr_set4 (a, b, rnd_mode, MPFR_SIGN_POS);
}
-/* Round (u, inex) into s with rounding mode rnd,
- where inex is the ternary value associated to u with the same rounding mode.
- Assumes PREC(u) = 2*PREC(s), and PREC(s) < GMP_NUMB_LIMBS. */
+/* Round (u, inex) into s with rounding mode rnd, where inex is the ternary
+ value associated to u with the *same* rounding mode.
+ Assumes PREC(u) = 2*PREC(s).
+ The main algorithm is the following:
+ rnd=RNDZ: inex2 = mpfr_set (s, u, rnd_mode); return inex2 | inex;
+ (a negative value, if any, is preserved in inex2 | inex)
+ rnd=RNDA: idem
+ rnd=RNDN: inex2 = mpfr_set (s, u, rnd_mode);
+ if (inex2) return inex2; else return inex; */
int
mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex)
{
@@ -92,89 +98,102 @@ mpfr_set_1_2 (mpfr_ptr s, mpfr_srcptr u, mpfr_rnd_t rnd_mode, int inex)
mp_limb_t *sp = MPFR_MANT(s);
mp_limb_t *up = MPFR_MANT(u);
mp_limb_t mask;
- int abs_inex;
+ int inex2;
- MPFR_ASSERTD(MPFR_PREC(s) < GMP_NUMB_BITS);
- MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s));
+ if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u)))
+ {
+ mpfr_set (s, u, rnd_mode);
+ return inex;
+ }
- mask = MPFR_LIMB_MASK(sh);
+ MPFR_ASSERTD(MPFR_PREC(u) == 2 * MPFR_PREC(s));
- if (MPFR_PREC(u) <= GMP_NUMB_BITS)
+ if (MPFR_PREC(s) < GMP_NUMB_BITS)
{
- mp_limb_t u0 = up[0];
+ mask = MPFR_LIMB_MASK(sh);
- /* it suffices to round (u0, inex) */
- rb = u0 & (MPFR_LIMB_ONE << (sh - 1));
- sb = (u0 & mask) ^ rb;
- sp[0] = u0 & ~mask;
- }
- else
- {
- mp_limb_t u1 = up[1];
+ if (MPFR_PREC(u) <= GMP_NUMB_BITS)
+ {
+ mp_limb_t u0 = up[0];
- /* we need to round (u1, u0, inex) */
- mask = MPFR_LIMB_MASK(sh);
- rb = u1 & (MPFR_LIMB_ONE << (sh - 1));
- sb = ((u1 & mask) ^ rb) | up[0];
- sp[0] = u1 & ~mask;
- }
+ /* it suffices to round (u0, inex) */
+ rb = u0 & (MPFR_LIMB_ONE << (sh - 1));
+ sb = (u0 & mask) ^ rb;
+ sp[0] = u0 & ~mask;
+ }
+ else
+ {
+ mp_limb_t u1 = up[1];
+
+ /* we need to round (u1, u0, inex) */
+ mask = MPFR_LIMB_MASK(sh);
+ rb = u1 & (MPFR_LIMB_ONE << (sh - 1));
+ sb = ((u1 & mask) ^ rb) | up[0];
+ sp[0] = u1 & ~mask;
+ }
- abs_inex = inex * MPFR_SIGN(u);
- MPFR_SIGN(s) = MPFR_SIGN(u);
- MPFR_EXP(s) = MPFR_EXP(u);
+ inex2 = inex * MPFR_SIGN(u);
+ MPFR_SIGN(s) = MPFR_SIGN(u);
+ MPFR_EXP(s) = MPFR_EXP(u);
- /* in case abs_inex > 0, the value of u is rounded away,
- thus we need to subtract something from (u0, rb, sb):
- (a) if sb is not zero, since the subtracted value is < 1, we can leave
+ /* in case inex2 > 0, the value of u is rounded away,
+ thus we need to subtract something from (u0, rb, sb):
+ (a) if sb is not zero, since the subtracted value is < 1, we can leave
sb as it is;
- (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1
- (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */
- if (abs_inex > 0)
- {
- if (rb && sb == 0)
+ (b) if rb <> 0 and sb = 0: change to rb = 0 and sb = 1
+ (c) if rb = sb = 0: change to rb = 1 and sb = 1, and subtract 1 */
+ if (inex2 > 0)
{
- rb = 0;
- sb = 1;
+ if (rb && sb == 0)
+ {
+ rb = 0;
+ sb = 1;
+ }
}
- }
- else /* abs_inex <= 0 */
- sb |= inex;
+ else /* inex2 <= 0 */
+ sb |= inex;
- /* now rb, sb are the correct round and sticky bits, together with the value
- of sp[0], except possibly in the case rb = sb = 0 and abs_inex > 0 */
- if (rb == 0 && sb == 0)
- {
- if (abs_inex <= 0)
- MPFR_RET(0);
- else /* abs_inex > 0 can only occur for RNDN and RNDA:
- RNDN: return sp[0] and inex
- RNDA: return sp[0] and inex */
- MPFR_RET(inex);
- }
- else if (rnd_mode == MPFR_RNDN)
- {
- if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0))
- goto truncate;
- else
- goto add_one_ulp;
- }
- else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s)))
- {
- truncate:
- MPFR_RET(-MPFR_SIGN(s));
- }
- else /* round away from zero */
- {
- add_one_ulp:
- sp[0] += MPFR_LIMB_ONE << sh;
- if (MPFR_UNLIKELY(sp[0] == 0))
+ /* now rb, sb are the round and sticky bits, together with the value of
+ sp[0], except possibly in the case rb = sb = 0 and inex2 > 0 */
+ if (rb == 0 && sb == 0)
+ {
+ if (inex2 <= 0)
+ MPFR_RET(0);
+ else /* inex2 > 0 can only occur for RNDN and RNDA:
+ RNDN: return sp[0] and inex
+ RNDA: return sp[0] and inex */
+ MPFR_RET(inex);
+ }
+ else if (rnd_mode == MPFR_RNDN)
+ {
+ if (rb == 0 || (sb == 0 && (sp[0] & (MPFR_LIMB_ONE << sh)) == 0))
+ goto truncate;
+ else
+ goto add_one_ulp;
+ }
+ else if (MPFR_IS_LIKE_RNDZ(rnd_mode, MPFR_IS_NEG(s)))
+ {
+ truncate:
+ MPFR_RET(-MPFR_SIGN(s));
+ }
+ else /* round away from zero */
{
- sp[0] = MPFR_LIMB_HIGHBIT;
- if (MPFR_EXP(s) + 1 <= __gmpfr_emax)
- MPFR_SET_EXP (s, MPFR_EXP(s) + 1);
- else /* overflow */
- return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s));
+ add_one_ulp:
+ sp[0] += MPFR_LIMB_ONE << sh;
+ if (MPFR_UNLIKELY(sp[0] == 0))
+ {
+ sp[0] = MPFR_LIMB_HIGHBIT;
+ if (MPFR_EXP(s) + 1 <= __gmpfr_emax)
+ MPFR_SET_EXP (s, MPFR_EXP(s) + 1);
+ else /* overflow */
+ return mpfr_overflow (s, rnd_mode, MPFR_SIGN(s));
+ }
+ MPFR_RET(MPFR_SIGN(s));
}
- MPFR_RET(MPFR_SIGN(s));
}
+
+ /* general case PREC(s) >= GMP_NUMB_BITS */
+ inex2 = mpfr_set (s, u, rnd_mode);
+ return (rnd_mode != MPFR_RNDN) ? inex | inex2
+ : (inex2) ? inex2 : inex;
}
diff --git a/src/ubf.c b/src/ubf.c
index 78eb90e37..efee51e7d 100644
--- a/src/ubf.c
+++ b/src/ubf.c
@@ -20,6 +20,7 @@ along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
+#define MPFR_NEED_LONGLONG_H
#include "mpfr-impl.h"
/* Note: In MPFR math functions, even if UBF code is not called first,
@@ -128,17 +129,32 @@ mpfr_ubf_mul_exact (mpfr_ubf_ptr a, mpfr_srcptr b, mpfr_srcptr c)
ap = MPFR_MANT (a);
- u = (bn >= cn) ?
- mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) :
- mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn);
- if (MPFR_UNLIKELY (MPFR_LIMB_MSB (u) == 0))
+ if (bn == 1 && cn == 1)
{
- m = 1;
- MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1));
- MPFR_ASSERTD (v == 0);
+ umul_ppmm (ap[1], ap[0], MPFR_MANT(b)[0], MPFR_MANT(c)[0]);
+ if (ap[1] & MPFR_LIMB_HIGHBIT)
+ m = 0;
+ else
+ {
+ ap[1] = (ap[1] << 1) | (ap[0] >> (GMP_NUMB_BITS - 1));
+ ap[0] = ap[0] << 1;
+ m = 1;
+ }
}
else
- m = 0;
+ {
+ u = (bn >= cn) ?
+ mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) :
+ mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn);
+ if (MPFR_LIMB_MSB (u) == 0)
+ {
+ m = 1;
+ MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1));
+ MPFR_ASSERTD (v == 0);
+ }
+ else
+ m = 0;
+ }
if (! MPFR_IS_UBF (b) && ! MPFR_IS_UBF (c) &&
(e = MPFR_GET_EXP (b) + MPFR_GET_EXP (c) - m,