diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 2 | ||||
-rw-r--r-- | src/add.c | 20 | ||||
-rw-r--r-- | src/add1.c | 29 | ||||
-rw-r--r-- | src/cmp2.c | 31 | ||||
-rw-r--r-- | src/fmma.c | 139 | ||||
-rw-r--r-- | src/mpfr-impl.h | 82 | ||||
-rw-r--r-- | src/mpfr.h | 87 | ||||
-rw-r--r-- | src/print_raw.c | 15 | ||||
-rw-r--r-- | src/sub.c | 24 | ||||
-rw-r--r-- | src/sub1.c | 117 | ||||
-rw-r--r-- | src/sum.c | 242 | ||||
-rw-r--r-- | src/ubf.c | 226 | ||||
-rw-r--r-- | src/uceil_log2.c | 10 | ||||
-rw-r--r-- | src/vasprintf.c | 18 |
14 files changed, 710 insertions, 332 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 3c80ea743..d05085a7e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -59,7 +59,7 @@ buildopt.c digamma.c bernoulli.c isregular.c set_flt.c get_flt.c \ scale2.c set_z_exp.c ai.c gammaonethird.c ieee_floats.h \ grandom.c fpif.c set_float128.c get_float128.c rndna.c nrandom.c \ random_deviate.h random_deviate.c erandom.c mpfr-mini-gmp.c \ -mpfr-mini-gmp.h fmma.c log_ui.c gamma_inc.c +mpfr-mini-gmp.h fmma.c log_ui.c gamma_inc.c ubf.c libmpfr_la_LIBADD = @LIBOBJS@ @@ -31,7 +31,7 @@ mpfr_add (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) mpfr_get_prec (c), mpfr_log_prec, c, rnd_mode), ("a[%Pu]=%.*Rg", mpfr_get_prec (a), mpfr_log_prec, a)); - if (MPFR_ARE_SINGULAR(b,c)) + if (MPFR_ARE_SINGULAR_OR_UBF (b, c)) { if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c)) { @@ -59,7 +59,7 @@ mpfr_add (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) MPFR_SET_SAME_SIGN(a, c); MPFR_RET(0); /* exact */ } - /* now either b or c is zero */ + /* now both b and c are finite numbers */ else if (MPFR_IS_ZERO(b)) { if (MPFR_IS_ZERO(c)) @@ -78,11 +78,23 @@ mpfr_add (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) } return mpfr_set (a, c, rnd_mode); } - else + else if (MPFR_IS_ZERO(c)) { - MPFR_ASSERTD(MPFR_IS_ZERO(c)); return mpfr_set (a, b, rnd_mode); } + else + { + MPFR_ASSERTD (MPFR_IS_PURE_UBF (b)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (c)); + /* mpfr_sub1sp and mpfr_add1sp are not intended to support UBF, + for which optimization is less important. */ + if (MPFR_SIGN(b) != MPFR_SIGN(c)) + return mpfr_sub1 (a, b, c, rnd_mode); + else if (MPFR_EXP_LESS_P (b, c)) + return mpfr_add1 (a, c, b, rnd_mode); + else + return mpfr_add1 (a, b, c, rnd_mode); + } } MPFR_ASSERTD (MPFR_IS_PURE_FP (b)); diff --git a/src/add1.c b/src/add1.c index 4ecb4e390..a3a7d4e85 100644 --- a/src/add1.c +++ b/src/add1.c @@ -30,13 +30,21 @@ mpfr_add1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) mp_limb_t *ap, *bp, *cp; mpfr_prec_t aq, bq, cq, aq2; mp_size_t an, bn, cn; - mpfr_exp_t difw, exp; + mpfr_exp_t difw, exp, diff_exp; int sh, rb, fb, inex; - mpfr_uexp_t diff_exp; MPFR_TMP_DECL(marker); - MPFR_ASSERTD(MPFR_IS_PURE_FP(b)); - MPFR_ASSERTD(MPFR_IS_PURE_FP(c)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (b)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (c)); + + if (MPFR_UNLIKELY (MPFR_IS_UBF (b))) + { + exp = mpfr_ubf_zexp2exp (MPFR_ZEXP (b)); + if (exp > __gmpfr_emax) + return mpfr_overflow (a, rnd_mode, MPFR_SIGN (b));; + } + else + exp = MPFR_GET_EXP (b); MPFR_TMP_MARK(marker); @@ -68,13 +76,18 @@ mpfr_add1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) MPN_COPY(cp, ap, cn); } - exp = MPFR_GET_EXP (b); MPFR_SET_SAME_SIGN(a, b); MPFR_UPDATE2_RND_MODE(rnd_mode, MPFR_SIGN(b)); /* now rnd_mode is either MPFR_RNDN, MPFR_RNDZ, MPFR_RNDA or MPFR_RNDF. */ - /* Note: exponents can be negative, but the unsigned subtraction is - a modular subtraction, so that one gets the correct result. */ - diff_exp = (mpfr_uexp_t) exp - MPFR_GET_EXP(c); + if (MPFR_UNLIKELY (MPFR_IS_UBF (c))) + { + MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX > MPFR_PREC_MAX); + diff_exp = mpfr_ubf_diff_exp (b, c); + } + else + diff_exp = exp - MPFR_GET_EXP (c); + + MPFR_ASSERTD (diff_exp >= 0); /* * 1. Compute the significant part A', the non-significant bits of A diff --git a/src/cmp2.c b/src/cmp2.c index c4eee67f7..9c649b822 100644 --- a/src/cmp2.c +++ b/src/cmp2.c @@ -38,6 +38,7 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) { mp_limb_t *bp, *cp, bb, cc = 0, lastc = 0, dif, high_dif = 0; mp_size_t bn, cn; + mpfr_exp_t sdiff_exp; mpfr_uexp_t diff_exp; mpfr_prec_t res = 0; int sign; @@ -48,13 +49,21 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) /* the cases b=0 or c=0 are also treated apart in agm and sub (which calls sub1) */ - MPFR_ASSERTD (MPFR_IS_PURE_FP(b)); - MPFR_ASSERTD (MPFR_IS_PURE_FP(c)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (b)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (c)); - if (MPFR_GET_EXP (b) >= MPFR_GET_EXP (c)) + sdiff_exp = MPFR_UNLIKELY (MPFR_IS_UBF (b) || MPFR_IS_UBF (c)) ? + mpfr_ubf_diff_exp (b, c) : MPFR_GET_EXP (b) - MPFR_GET_EXP (c); + + /* The returned result is restricted to [MPFR_EXP_MIN,MPFR_EXP_MAX], + which is the range of the mpfr_exp_t type. But under the condition + below, the value of cancel will not be affected. */ + MPFR_STAT_STATIC_ASSERT (MPFR_EXP_MAX > MPFR_PREC_MAX); + + if (sdiff_exp >= 0) { sign = 1; - diff_exp = (mpfr_uexp_t) MPFR_GET_EXP (b) - MPFR_GET_EXP (c); + diff_exp = sdiff_exp; bp = MPFR_MANT(b); cp = MPFR_MANT(c); @@ -85,7 +94,7 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) if (MPFR_UNLIKELY (cn < 0)) /* c discards exactly the upper part of b */ { - unsigned int z; + int z; MPFR_ASSERTD (bn >= 0); @@ -96,7 +105,7 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) res += GMP_NUMB_BITS; } - count_leading_zeros(z, bp[bn]); /* bp[bn] <> 0 */ + count_leading_zeros (z, bp[bn]); /* bp[bn] <> 0 */ *cancel = res + z; return sign; } @@ -118,7 +127,7 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) else /* MPFR_EXP(b) < MPFR_EXP(c) */ { sign = -1; - diff_exp = (mpfr_uexp_t) MPFR_GET_EXP (c) - MPFR_GET_EXP (b); + diff_exp = - (mpfr_uexp_t) sdiff_exp; bp = MPFR_MANT(c); cp = MPFR_MANT(b); @@ -152,6 +161,7 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) while (MPFR_UNLIKELY ((cn >= 0 || lastc != 0) && (high_dif == 0) && (dif == 1))) { /* dif=1 implies diff_exp = 0 or 1 */ + MPFR_ASSERTD (diff_exp <= 1); bb = (bn >= 0) ? bp[bn] : 0; cc = lastc; if (cn >= 0) @@ -160,8 +170,9 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) { cc += cp[cn]; } - else /* diff_exp = 1 */ + else { + MPFR_ASSERTD (diff_exp == 1); cc += cp[cn] >> 1; lastc = cp[cn] << (GMP_NUMB_BITS - 1); } @@ -188,9 +199,9 @@ mpfr_cmp2 (mpfr_srcptr b, mpfr_srcptr c, mpfr_prec_t *cancel) } else /* high_dif == 0 */ { - unsigned int z; + int z; - count_leading_zeros(z, dif); /* dif > 1 here */ + count_leading_zeros (z, dif); /* dif > 1 here */ res += z; if (MPFR_LIKELY(dif != (MPFR_LIMB_ONE << (GMP_NUMB_BITS - z - 1)))) { /* dif is not a power of two */ diff --git a/src/fmma.c b/src/fmma.c index 9f0af2faf..fbd94ff6d 100644 --- a/src/fmma.c +++ b/src/fmma.c @@ -23,103 +23,43 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., #include "mpfr-impl.h" static int -mpfr_fmma_slow (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, - mpfr_srcptr d, mpfr_rnd_t rnd) +mpfr_fmma_aux (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, + mpfr_srcptr d, mpfr_rnd_t rnd, int neg) { - mpfr_t ab, cd; + mpfr_ubf_t u, v; + mp_size_t un, vn; + mpfr_limb_ptr up, vp; int inex; + MPFR_TMP_DECL(marker); - mpfr_init2 (ab, MPFR_PREC(a) + MPFR_PREC(b)); - mpfr_init2 (cd, MPFR_PREC(c) + MPFR_PREC(d)); - /* FIXME: The following multiplications may overflow or underflow - (even more often with the fact that the exponent range is not - extended), in which case the result is not exact. This should - be solved with future unbounded floats. */ - mpfr_mul (ab, a, b, MPFR_RNDZ); /* exact */ - mpfr_mul (cd, c, d, MPFR_RNDZ); /* exact */ - inex = mpfr_add (z, ab, cd, rnd); - mpfr_clear (ab); - mpfr_clear (cd); - return inex; -} + MPFR_LOG_FUNC + (("a[%Pu]=%.*Rg b[%Pu]=%.*Rg c[%Pu]=%.*Rg d[%Pu]=%.*Rg rnd=%d neg=%d", + mpfr_get_prec (a), mpfr_log_prec, a, + mpfr_get_prec (b), mpfr_log_prec, b, + mpfr_get_prec (c), mpfr_log_prec, c, + mpfr_get_prec (d), mpfr_log_prec, d, rnd, neg), + ("z[%Pu]=%.*Rg inex=%d", + mpfr_get_prec (z), mpfr_log_prec, z, inex)); -/* z <- a*b + c*d */ -static int -mpfr_fmma_fast (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, - mpfr_srcptr d, mpfr_rnd_t rnd) -{ - /* Assumes that a, b, c, d are finite and non-zero; so any multiplication - of two of them yielding an infinity is an overflow, and a - multiplication yielding 0 is an underflow. - Assumes further that z is distinct from a, b, c, d. */ - - int inex; - mpfr_t u, v; - mp_size_t an, bn, cn, dn; - mpfr_limb_ptr up, vp; - MPFR_TMP_DECL(marker); - MPFR_SAVE_EXPO_DECL (expo); - - MPFR_TMP_MARK(marker); - MPFR_SAVE_EXPO_MARK (expo); - - /* u=a*b, v=c*d exactly */ - an = MPFR_LIMB_SIZE(a); - bn = MPFR_LIMB_SIZE(b); - cn = MPFR_LIMB_SIZE(c); - dn = MPFR_LIMB_SIZE(d); - MPFR_TMP_INIT (up, u, (an + bn) * GMP_NUMB_BITS, an + bn); - MPFR_TMP_INIT (vp, v, (cn + dn) * GMP_NUMB_BITS, cn + dn); - - /* u <- a*b */ - if (an >= bn) - mpn_mul (up, MPFR_MANT(a), an, MPFR_MANT(b), bn); - else - mpn_mul (up, MPFR_MANT(b), bn, MPFR_MANT(a), an); - if ((up[an + bn - 1] & MPFR_LIMB_HIGHBIT) == 0) - { - mpn_lshift (up, up, an + bn, 1); - /* EXP(a) and EXP(b) are in [1-2^(n-2), 2^(n-2)-1] where - mpfr_exp_t has is n-bit wide, thus EXP(a)+EXP(b) is in - [2-2^(n-1), 2^(n-1)-2]. We use the fact here that 2*MPFR_EMIN_MIN-1 - is a valid exponent (see mpfr-impl.h). However we don't use - MPFR_SET_EXP() which only allows the restricted exponent range - [1-2^(n-2), 2^(n-2)-1]. */ - MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b) - 1; - } - else - MPFR_EXP(u) = MPFR_EXP(a) + MPFR_EXP(b); - - /* v <- c*d */ - if (cn >= dn) - mpn_mul (vp, MPFR_MANT(c), cn, MPFR_MANT(d), dn); - else - mpn_mul (vp, MPFR_MANT(d), dn, MPFR_MANT(c), cn); - if ((vp[cn + dn - 1] & MPFR_LIMB_HIGHBIT) == 0) - { - mpn_lshift (vp, vp, cn + dn, 1); - MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d) - 1; - } - else - MPFR_EXP(v) = MPFR_EXP(c) + MPFR_EXP(d); - - MPFR_PREC(u) = (an + bn) * GMP_NUMB_BITS; - MPFR_PREC(v) = (cn + dn) * GMP_NUMB_BITS; - MPFR_SIGN(u) = MPFR_MULT_SIGN(MPFR_SIGN(a), MPFR_SIGN(b)); - MPFR_SIGN(v) = MPFR_MULT_SIGN(MPFR_SIGN(c), MPFR_SIGN(d)); - - /* tentatively compute z as u+v; here we need z to be distinct - from a, b, c, d to avoid losing the input values in case we - need to call mpfr_fmma_slow */ - /* FIXME: The above comment is no longer valid. Anyway, with - unbounded floats (based on an exact multiplication like above), - it will no longer be necessary to distinguish fast and slow. */ - inex = mpfr_add (z, u, v, rnd); - - MPFR_TMP_FREE(marker); - MPFR_SAVE_EXPO_FREE (expo); - - return mpfr_check_range (z, inex, rnd); + MPFR_TMP_MARK (marker); + + un = MPFR_LIMB_SIZE (a) + MPFR_LIMB_SIZE (b); + vn = MPFR_LIMB_SIZE (c) + MPFR_LIMB_SIZE (d); + MPFR_TMP_INIT (up, u, (mpfr_prec_t) un * GMP_NUMB_BITS, un); + MPFR_TMP_INIT (vp, v, (mpfr_prec_t) vn * GMP_NUMB_BITS, vn); + + mpfr_ubf_mul_exact (u, a, b); + mpfr_ubf_mul_exact (v, c, d); + if (neg) + MPFR_CHANGE_SIGN (v); + inex = mpfr_add (z, (mpfr_srcptr) u, (mpfr_srcptr) v, rnd); + + MPFR_UBF_CLEAR_EXP (u); + MPFR_UBF_CLEAR_EXP (v); + + MPFR_TMP_FREE (marker); + + return inex; } /* z <- a*b + c*d */ @@ -127,13 +67,7 @@ int mpfr_fmma (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_srcptr d, mpfr_rnd_t rnd) { - mpfr_limb_ptr zp = MPFR_MANT(z); - - return (mpfr_regular_p (a) && mpfr_regular_p (b) && mpfr_regular_p (c) && - mpfr_regular_p (d) && zp != MPFR_MANT(a) && zp != MPFR_MANT(b) && - zp != MPFR_MANT(c) && zp != MPFR_MANT(d)) - ? mpfr_fmma_fast (z, a, b, c, d, rnd) - : mpfr_fmma_slow (z, a, b, c, d, rnd); + return mpfr_fmma_aux (z, a, b, c, d, rnd, 0); } /* z <- a*b - c*d */ @@ -141,8 +75,5 @@ int mpfr_fmms (mpfr_ptr z, mpfr_srcptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_srcptr d, mpfr_rnd_t rnd) { - mpfr_t minus_c; - - MPFR_ALIAS (minus_c, c, -MPFR_SIGN(c), MPFR_EXP(c)); - return mpfr_fmma (z, a, b, minus_c, d, rnd); + return mpfr_fmma_aux (z, a, b, c, d, rnd, 1); } diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h index 1d52197c8..430c30f11 100644 --- a/src/mpfr-impl.h +++ b/src/mpfr-impl.h @@ -820,11 +820,13 @@ union ieee_double_decimal64 { double d; _Decimal64 d64; }; #if _MPFR_EXP_FORMAT <= 3 typedef long int mpfr_eexp_t; +typedef unsigned long int mpfr_ueexp_t; # define mpfr_get_exp_t(x,r) mpfr_get_si((x),(r)) # define mpfr_set_exp_t(x,e,r) mpfr_set_si((x),(e),(r)) # define MPFR_EXP_FSPEC "l" #else typedef intmax_t mpfr_eexp_t; +typedef uintmax_t mpfr_ueexp_t; # define mpfr_get_exp_t(x,r) mpfr_get_sj((x),(r)) # define mpfr_set_exp_t(x,e,r) mpfr_set_sj((x),(e),(r)) # define MPFR_EXP_FSPEC "j" @@ -840,18 +842,17 @@ typedef intmax_t mpfr_eexp_t; /* Definition of the exponent limits for MPFR numbers. * These limits are chosen so that if e is such an exponent, then 2e-1 and - * 2e+1 are valid exponents. This is useful for intermediate computations, - * in particular the multiplication. We must have MPFR_EMIN_MIN >= 3-2^(n-2) - * = 3-MPFR_EXP_INVALID so that 2*MPFR_EMIN_MIN-1 > __MPFR_EXP_INF = 3-2^(n-1). + * 2e+1 are representable. This is useful for intermediate computations, + * in particular the multiplication. */ #undef MPFR_EMIN_MIN #undef MPFR_EMIN_MAX #undef MPFR_EMAX_MIN #undef MPFR_EMAX_MAX -#define MPFR_EMIN_MIN (3-MPFR_EXP_INVALID) -#define MPFR_EMIN_MAX (MPFR_EXP_INVALID-3) -#define MPFR_EMAX_MIN (3-MPFR_EXP_INVALID) -#define MPFR_EMAX_MAX (MPFR_EXP_INVALID-3) +#define MPFR_EMIN_MIN (1-MPFR_EXP_INVALID) +#define MPFR_EMIN_MAX (MPFR_EXP_INVALID-1) +#define MPFR_EMAX_MIN (1-MPFR_EXP_INVALID) +#define MPFR_EMAX_MAX (MPFR_EXP_INVALID-1) /* Use MPFR_GET_EXP and MPFR_SET_EXP instead of MPFR_EXP directly, unless when the exponent may be out-of-range, for instance when @@ -880,6 +881,10 @@ typedef intmax_t mpfr_eexp_t; # define MPFR_SET_INVALID_EXP(x) ((void) 0) #endif +#define MPFR_EXP_LESS_P(x,y) \ + (MPFR_UNLIKELY (MPFR_IS_UBF (x) || MPFR_IS_UBF (y)) ? \ + mpfr_ubf_exp_less_p (x, y) : MPFR_GET_EXP (x) < MPFR_GET_EXP (y)) + /****************************************************** ********* Singular values (NAN, INF, ZERO) ********* @@ -889,6 +894,7 @@ typedef intmax_t mpfr_eexp_t; # define MPFR_EXP_ZERO (MPFR_EXP_MIN+1) # define MPFR_EXP_NAN (MPFR_EXP_MIN+2) # define MPFR_EXP_INF (MPFR_EXP_MIN+3) +# define MPFR_EXP_UBF (MPFR_EXP_MIN+4) #define MPFR_IS_NAN(x) (MPFR_EXP(x) == MPFR_EXP_NAN) #define MPFR_SET_NAN(x) (MPFR_EXP(x) = MPFR_EXP_NAN) @@ -897,21 +903,34 @@ typedef intmax_t mpfr_eexp_t; #define MPFR_IS_ZERO(x) (MPFR_EXP(x) == MPFR_EXP_ZERO) #define MPFR_SET_ZERO(x) (MPFR_EXP(x) = MPFR_EXP_ZERO) #define MPFR_NOTZERO(x) (MPFR_EXP(x) != MPFR_EXP_ZERO) +#define MPFR_IS_UBF(x) (MPFR_EXP(x) == MPFR_EXP_UBF) +#define MPFR_SET_UBF(x) (MPFR_EXP(x) = MPFR_EXP_UBF) #define MPFR_IS_NORMALIZED(x) \ (MPFR_LIMB_MSB (MPFR_MANT(x)[MPFR_LAST_LIMB(x)]) != 0) #define MPFR_IS_FP(x) (!MPFR_IS_NAN(x) && !MPFR_IS_INF(x)) #define MPFR_IS_SINGULAR(x) (MPFR_EXP(x) <= MPFR_EXP_INF) +#define MPFR_IS_SINGULAR_OR_UBF(x) (MPFR_EXP(x) <= MPFR_EXP_UBF) #define MPFR_IS_PURE_FP(x) \ (!MPFR_IS_SINGULAR(x) && \ (MPFR_ASSERTD (MPFR_EXP (x) >= MPFR_EMIN_MIN && \ MPFR_EXP (x) <= MPFR_EMAX_MAX && \ MPFR_IS_NORMALIZED (x)), 1)) +#define MPFR_IS_PURE_UBF(x) \ + (!MPFR_IS_SINGULAR(x) && \ + (MPFR_ASSERTD ((MPFR_IS_UBF (x) || \ + (MPFR_EXP (x) >= MPFR_EMIN_MIN && \ + MPFR_EXP (x) <= MPFR_EMAX_MAX)) && \ + MPFR_IS_NORMALIZED (x)), 1)) #define MPFR_ARE_SINGULAR(x,y) \ (MPFR_UNLIKELY(MPFR_IS_SINGULAR(x)) || MPFR_UNLIKELY(MPFR_IS_SINGULAR(y))) +#define MPFR_ARE_SINGULAR_OR_UBF(x,y) \ + (MPFR_UNLIKELY(MPFR_IS_SINGULAR_OR_UBF(x)) || \ + MPFR_UNLIKELY(MPFR_IS_SINGULAR_OR_UBF(y))) + #define MPFR_IS_POWER_OF_2(x) \ (mpfr_cmp_ui_2exp ((x), 1, MPFR_GET_EXP (x) - 1) == 0) @@ -2237,4 +2256,53 @@ __MPFR_DECLSPEC extern int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2]; #endif /* MPFR_COV_CHECK */ +/****************************************************** + ***************** Unbounded Floats ***************** + ******************************************************/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* An UBF is like a MPFR number, but with an additional mpz_t member, + which is assumed to be present (with a value in it) when the usual + exponent field has the value MPFR_EXP_UBF. The goal of this compatible + representation is to easily be able to support UBF in "normal" code + and hopefully avoid aliasing issues at the same time. And code that + accepts UBF in input should also accept mpfr_t as a consequence; this + makes mpfr_t to UBF conversion unnecessary. + When an input of a public function is a UBF, the semantic remains + internal to MPFR and can change in the future. + Note that functions used for logging need to support UBF (currently + done by printing that a number is a UBF, as it may be difficult to + do more without significant changes). */ + +typedef struct { + mpfr_prec_t _mpfr_prec; + mpfr_sign_t _mpfr_sign; + mpfr_exp_t _mpfr_exp; + mp_limb_t *_mpfr_d; + mpz_t _mpfr_zexp; +} __mpfr_ubf_struct; + +typedef __mpfr_ubf_struct mpfr_ubf_t[1]; +typedef __mpfr_ubf_struct *mpfr_ubf_ptr; + +__MPFR_DECLSPEC void mpfr_ubf_mul_exact (mpfr_ubf_ptr, + mpfr_srcptr, mpfr_srcptr); +__MPFR_DECLSPEC int mpfr_ubf_exp_less_p (mpfr_srcptr, mpfr_srcptr); +__MPFR_DECLSPEC mpfr_exp_t mpfr_ubf_zexp2exp (mpz_ptr); +__MPFR_DECLSPEC mpfr_exp_t mpfr_ubf_diff_exp (mpfr_srcptr, mpfr_srcptr); + +#if defined (__cplusplus) +} +#endif + +#define MPFR_ZEXP(x) \ + ((void) (x)->_mpfr_exp /* to check that x has a correct type */, \ + ((mpfr_ubf_ptr) (x))->_mpfr_zexp) + +#define MPFR_UBF_CLEAR_EXP(x) \ + ((void) (MPFR_IS_UBF (u) && (mpz_clear (MPFR_ZEXP (x)), 0))) + #endif /* __MPFR_IMPL_H__ */ diff --git a/src/mpfr.h b/src/mpfr.h index ca7ebbf8d..bc5802bdb 100644 --- a/src/mpfr.h +++ b/src/mpfr.h @@ -184,7 +184,7 @@ typedef uintmax_t mpfr_uexp_t; #endif /* Definition of the standard exponent limits */ -#define MPFR_EMAX_DEFAULT ((mpfr_exp_t) (((mpfr_ulong) 1 << 30) - 3)) +#define MPFR_EMAX_DEFAULT ((mpfr_exp_t) (((mpfr_ulong) 1 << 30) - 1)) #define MPFR_EMIN_DEFAULT (-(MPFR_EMAX_DEFAULT)) /* DON'T USE THIS! (For MPFR-public macros only, see below.) @@ -442,7 +442,7 @@ __MPFR_DECLSPEC int __MPFR_DECLSPEC int mpfr_set_si_2exp (mpfr_ptr, long, mpfr_exp_t, mpfr_rnd_t); __MPFR_DECLSPEC int - mpfr_set_ui_2exp (mpfr_ptr,unsigned long,mpfr_exp_t,mpfr_rnd_t); + mpfr_set_ui_2exp (mpfr_ptr, unsigned long, mpfr_exp_t, mpfr_rnd_t); #ifndef MPFR_USE_MINI_GMP /* mini-gmp does not provide mpq_t, we disable the following functions */ __MPFR_DECLSPEC int @@ -593,7 +593,7 @@ __MPFR_DECLSPEC int mpfr_div_d (mpfr_ptr, mpfr_srcptr, __MPFR_DECLSPEC int mpfr_d_div (mpfr_ptr, double, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_sqr (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_sqr (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_const_pi (mpfr_ptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_const_log2 (mpfr_ptr, mpfr_rnd_t); @@ -603,8 +603,8 @@ __MPFR_DECLSPEC int mpfr_const_catalan (mpfr_ptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_agm (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_log (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_log2 (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_log (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_log2 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_log10 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_log1p (mpfr_ptr, mpfr_srcptr, @@ -612,14 +612,14 @@ __MPFR_DECLSPEC int mpfr_log1p (mpfr_ptr, mpfr_srcptr, __MPFR_DECLSPEC int mpfr_log_ui (mpfr_ptr, unsigned long, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_exp (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_exp2 (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_exp (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_exp2 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_exp10 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_expm1 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_eint (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_li2 (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_eint (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_li2 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_cmp (mpfr_srcptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_cmp3 (mpfr_srcptr, mpfr_srcptr, int); @@ -651,7 +651,7 @@ __MPFR_DECLSPEC int mpfr_mul_2si (mpfr_ptr, mpfr_srcptr, __MPFR_DECLSPEC int mpfr_div_2si (mpfr_ptr, mpfr_srcptr, long, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_rint (mpfr_ptr,mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_rint (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_roundeven (mpfr_ptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_round (mpfr_ptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_trunc (mpfr_ptr, mpfr_srcptr); @@ -667,7 +667,7 @@ __MPFR_DECLSPEC int mpfr_rint_ceil (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_rint_floor (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_frac (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_frac (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_modf (mpfr_ptr, mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_remquo (mpfr_ptr, long*, mpfr_srcptr, @@ -685,7 +685,7 @@ __MPFR_DECLSPEC int mpfr_fits_uint_p (mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_fits_sint_p (mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_fits_ushort_p (mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_fits_sshort_p (mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_fits_uintmax_p (mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_fits_uintmax_p (mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_fits_intmax_p (mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC void mpfr_extract (mpz_ptr, mpfr_srcptr, @@ -705,51 +705,52 @@ __MPFR_DECLSPEC int mpfr_greaterequal_p (mpfr_srcptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_less_p (mpfr_srcptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_lessequal_p (mpfr_srcptr, mpfr_srcptr); -__MPFR_DECLSPEC int mpfr_lessgreater_p (mpfr_srcptr,mpfr_srcptr); +__MPFR_DECLSPEC int mpfr_lessgreater_p (mpfr_srcptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_equal_p (mpfr_srcptr, mpfr_srcptr); __MPFR_DECLSPEC int mpfr_unordered_p (mpfr_srcptr, mpfr_srcptr); -__MPFR_DECLSPEC int mpfr_atanh (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_acosh (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_asinh (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_cosh (mpfr_ptr,mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_sinh (mpfr_ptr,mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_tanh (mpfr_ptr,mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_atanh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_acosh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_asinh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_cosh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_sinh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_tanh (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_sinh_cosh (mpfr_ptr, mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_sech (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_csch (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_coth (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_sech (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_csch (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_coth (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_acos (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_asin (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_atan (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_sin (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_acos (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_asin (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_atan (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_sin (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_sin_cos (mpfr_ptr, mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_cos (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_tan (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_atan2 (mpfr_ptr,mpfr_srcptr,mpfr_srcptr, +__MPFR_DECLSPEC int mpfr_cos (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_tan (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_atan2 (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_sec (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_csc (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_cot (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_sec (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_csc (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_cot (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_hypot (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_erf (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_erfc (mpfr_ptr, mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_cbrt (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_root (mpfr_ptr,mpfr_srcptr,unsigned long,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_gamma (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_gamma_inc (mpfr_ptr,mpfr_srcptr, +__MPFR_DECLSPEC int mpfr_erf (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_erfc (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_cbrt (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_root (mpfr_ptr, mpfr_srcptr, unsigned long, + mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_gamma (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_gamma_inc (mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_lngamma (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_lgamma (mpfr_ptr,int*,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_digamma (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_zeta (mpfr_ptr,mpfr_srcptr,mpfr_rnd_t); -__MPFR_DECLSPEC int mpfr_zeta_ui (mpfr_ptr,unsigned long,mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_lngamma (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_lgamma (mpfr_ptr, int *, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_digamma (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_zeta (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); +__MPFR_DECLSPEC int mpfr_zeta_ui (mpfr_ptr, unsigned long, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_fac_ui (mpfr_ptr, unsigned long int, mpfr_rnd_t); __MPFR_DECLSPEC int mpfr_j0 (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); diff --git a/src/print_raw.c b/src/print_raw.c index af9c6c1b9..cd19ddee7 100644 --- a/src/print_raw.c +++ b/src/print_raw.c @@ -49,25 +49,22 @@ mpfr_fprint_binary (FILE *stream, mpfr_srcptr x) px = MPFR_PREC (x); fprintf (stream, "0."); - for (n = (px - 1) / GMP_NUMB_BITS; ; n--) + for (n = (px - 1) / GMP_NUMB_BITS; n >= 0; n--) { mp_limb_t wd, t; - MPFR_ASSERTN (n >= 0); wd = mx[n]; for (t = MPFR_LIMB_HIGHBIT; t != 0; t >>= 1) { putc ((wd & t) == 0 ? '0' : '1', stream); if (--px == 0) - { - mpfr_exp_t ex; - - ex = MPFR_EXP (x); - fprintf (stream, "E%" MPFR_EXP_FSPEC "d", (mpfr_eexp_t) ex); - return; - } + break; } } + if (MPFR_IS_UBF (x)) + gmp_fprintf (stream, "E%Zd", MPFR_ZEXP (x)); + else + fprintf (stream, "E%" MPFR_EXP_FSPEC "d", (mpfr_eexp_t) MPFR_EXP (x)); } } @@ -31,7 +31,7 @@ mpfr_sub (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) mpfr_get_prec (c), mpfr_log_prec, c, rnd_mode), ("a[%Pu]=%.*Rg", mpfr_get_prec (a), mpfr_log_prec, a)); - if (MPFR_ARE_SINGULAR (b,c)) + if (MPFR_ARE_SINGULAR_OR_UBF (b,c)) { if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c)) { @@ -72,11 +72,29 @@ mpfr_sub (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) else return mpfr_neg (a, c, rnd_mode); } - else + else if (MPFR_IS_ZERO (c)) { - MPFR_ASSERTD (MPFR_IS_ZERO (c)); return mpfr_set (a, b, rnd_mode); } + else + { + MPFR_ASSERTD (MPFR_IS_PURE_UBF (b)); + MPFR_ASSERTD (MPFR_IS_PURE_UBF (c)); + /* mpfr_sub1sp and mpfr_add1sp are not intended to support UBF, + for which optimization is less important. */ + if (MPFR_SIGN(b) == MPFR_SIGN(c)) + return mpfr_sub1 (a, b, c, rnd_mode); + else if (MPFR_EXP_LESS_P (b, c)) + { + int inexact; + rnd_mode = MPFR_INVERT_RND (rnd_mode); + inexact = mpfr_add1 (a, c, b, rnd_mode); + MPFR_CHANGE_SIGN (a); + return -inexact; + } + else + return mpfr_add1 (a, b, c, rnd_mode); + } } MPFR_ASSERTD (MPFR_IS_PURE_FP (b)); diff --git a/src/sub1.c b/src/sub1.c index 8dfda0256..e38eae63a 100644 --- a/src/sub1.c +++ b/src/sub1.c @@ -28,11 +28,13 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., a positive value otherwise. */ +/* TODO: check the code in case exp_b == MPFR_EXP_MAX. */ + int mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) { int sign; - mpfr_exp_t diff_exp; + mpfr_exp_t diff_exp, exp_b; mpfr_prec_t cancel, cancel1; mp_size_t cancel2, an, bn, cn, cn0; mp_limb_t *ap, *bp, *cp; @@ -87,7 +89,17 @@ mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) else MPFR_SET_SAME_SIGN (a,b); - diff_exp = MPFR_GET_EXP (b) - MPFR_GET_EXP (c); + if (MPFR_UNLIKELY (MPFR_IS_UBF (b) || MPFR_IS_UBF (c))) + { + exp_b = MPFR_IS_UBF (b) ? + mpfr_ubf_zexp2exp (MPFR_ZEXP (b)) : MPFR_GET_EXP (b); + diff_exp = mpfr_ubf_diff_exp (b, c); + } + else + { + exp_b = MPFR_GET_EXP (b); + diff_exp = exp_b - MPFR_GET_EXP (c); + } MPFR_ASSERTD (diff_exp >= 0); aq = MPFR_GET_PREC (a); @@ -99,6 +111,8 @@ mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) but it is more expensive and not very useful */ if (MPFR_UNLIKELY (MAX (aq, bq) + 2 <= diff_exp)) { + MPFR_LOG_MSG (("case c small\n", 0)); + /* Remember, we can't have an exact result! */ /* A.AAAAAAAAAAAAAAAAA = B.BBBBBBBBBBBBBBB @@ -110,55 +124,65 @@ mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) MPFR_SET_EXP (a, MPFR_GET_EXP (b)); MPFR_RNDRAW_EVEN (inexact, a, MPFR_MANT (b), bq, - rnd_mode, MPFR_SIGN (a), ++ MPFR_EXP (a)); + rnd_mode, MPFR_SIGN (a), + if (MPFR_EXP (a) != MPFR_EXP_MAX) + ++ MPFR_EXP (a)); + MPFR_LOG_MSG (("inexact=%d\n", inexact)); if (inexact == 0) { - /* a = b (Exact) - But we know it isn't (Since we have to remove `c') - So if we round to Zero, we have to remove one ulp. - Otherwise the result is correctly rounded. */ - /* An overflow is not possible. */ - MPFR_ASSERTD (MPFR_EXP (a) <= __gmpfr_emax); - if (MPFR_IS_LIKE_RNDZ (rnd_mode, MPFR_IS_NEG (a))) + /* a = b, but the exact value of b - c is a bit below. Then, + except for directed rounding similar to toward zero and + before overflow checking: a is the correctly rounded value + and since |b| - |c| < |a|, the ternary value value is given + by the sign of a. */ + if (! MPFR_IS_LIKE_RNDZ (rnd_mode, MPFR_IS_NEG (a))) { - mpfr_nexttozero (a); - MPFR_RET (- MPFR_INT_SIGN (a)); + inexact = MPFR_INT_SIGN (a); + goto check_overflow; } - MPFR_RET (MPFR_INT_SIGN (a)); } - else + else /* inexact != 0 */ { /* A.AAAAAAAAAAAAAA = B.BBBBBBBBBBBBBBB - C.CCCCCCCCCCCCC */ - /* It isn't exact so Prec(b) > Prec(a) and the last - Prec(b)-Prec(a) bits of `b' are not zeros. - Which means that removing c from b can't generate a carry - except in case of even rounding. - In all other cases the result and the inexact flag should be - correct (We can't have an exact result). - In case of EVEN rounding: + /* It isn't exact, so PREC(b) > PREC(a) and the last + PREC(b)-PREC(a) bits of b are not all zeros. + Subtracting c from b will not have an effect on the rounding + except in case of a midpoint in the round-to-nearest mode, + when the even rounding was done away from zero instead of + toward zero. + In case of even rounding: 1.BBBBBBBBBBBBBx10 - 1.CCCCCCCCCCCC - = 1.BBBBBBBBBBBBBx01 Rounded to Prec(b) - = 1.BBBBBBBBBBBBBx Nearest / Rounded to Prec(a) + = 1.BBBBBBBBBBBBBx01 Rounded to PREC(b) + = 1.BBBBBBBBBBBBBx Nearest / Rounded to PREC(a) Set gives: 1.BBBBBBBBBBBBB0 if inexact == EVEN_INEX (x == 0) 1.BBBBBBBBBBBBB1+1 if inexact == -EVEN_INEX (x == 1) - which means we get a wrong rounded result if x==1, - i.e. inexact= MPFR_EVEN_INEX */ - if (MPFR_UNLIKELY (inexact == MPFR_EVEN_INEX * MPFR_INT_SIGN (a))) - { - if (MPFR_UNLIKELY (MPFR_EXP (a) > __gmpfr_emax)) - mpfr_setmax (a, __gmpfr_emax); - else - mpfr_nexttozero (a); - inexact = -MPFR_INT_SIGN (a); - } - else if (MPFR_UNLIKELY (MPFR_EXP (a) > __gmpfr_emax)) - inexact = mpfr_overflow (a, rnd_mode, MPFR_SIGN (a)); - MPFR_RET (inexact); + which means we get a wrong rounded result if x == 1, + i.e. inexact == MPFR_EVEN_INEX (for positive numbers). */ + if (MPFR_LIKELY (inexact != MPFR_EVEN_INEX * MPFR_INT_SIGN (a))) + goto check_overflow; } + /* We need to take the value preceding |a|. We can't use + mpfr_nexttozero due to a possible out-of-range exponent. + But this will allow us to have more specific code. */ + MPFR_LOG_MSG (("correcting the value of a\n", 0)); + sh = (mpfr_prec_t) an * GMP_NUMB_BITS - aq; + mpn_sub_1 (ap, ap, an, MPFR_LIMB_ONE << sh); + if (MPFR_UNLIKELY (MPFR_LIMB_MSB (ap[an-1]) == 0)) + { + MPFR_EXP (a) --; + /* The following is valid whether an = 1 or an > 1. */ + ap[an-1] |= MPFR_LIMB_HIGHBIT; + } + inexact = - MPFR_INT_SIGN (a); + check_overflow: + if (MPFR_UNLIKELY (MPFR_EXP (a) > __gmpfr_emax)) + return mpfr_overflow (a, rnd_mode, MPFR_SIGN (a)); + else + MPFR_RET (inexact); } /* reserve a space to store b aligned with the result, i.e. shifted by @@ -631,16 +655,21 @@ mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) mpfr_exp_t exp_a; cancel -= add_exp; /* OK: add_exp is an int equal to 0 or 1 */ - exp_a = MPFR_GET_EXP (b) - cancel; - if (MPFR_UNLIKELY(exp_a < __gmpfr_emin)) + exp_a = exp_b - cancel; + if (MPFR_UNLIKELY (exp_a < __gmpfr_emin)) { - MPFR_TMP_FREE(marker); + MPFR_TMP_FREE (marker); if (rnd_mode == MPFR_RNDN && (exp_a < __gmpfr_emin - 1 || (inexact >= 0 && mpfr_powerof2_raw (a)))) rnd_mode = MPFR_RNDZ; return mpfr_underflow (a, rnd_mode, MPFR_SIGN(a)); } + if (MPFR_UNLIKELY (exp_a > __gmpfr_emax)) + { + MPFR_TMP_FREE (marker); + return mpfr_overflow (a, rnd_mode, MPFR_SIGN (a)); + } MPFR_SET_EXP (a, exp_a); } else /* cancel = 0: MPFR_EXP(a) <- MPFR_EXP(b) + add_exp */ @@ -648,13 +677,11 @@ mpfr_sub1 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mpfr_rnd_t rnd_mode) /* in case cancel = 0, add_exp can still be 1, in case b is just below a power of two, c is very small, prec(a) < prec(b), and rnd=away or nearest */ - mpfr_exp_t exp_b; - - exp_b = MPFR_GET_EXP (b); - if (MPFR_UNLIKELY(add_exp && exp_b == __gmpfr_emax)) + MPFR_ASSERTD (add_exp == 0 || add_exp == 1); + if (MPFR_UNLIKELY (add_exp && exp_b >= __gmpfr_emax)) { - MPFR_TMP_FREE(marker); - return mpfr_overflow (a, rnd_mode, MPFR_SIGN(a)); + MPFR_TMP_FREE (marker); + return mpfr_overflow (a, rnd_mode, MPFR_SIGN (a)); } MPFR_SET_EXP (a, exp_b + add_exp); } @@ -23,16 +23,15 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" -/* FIXME: In the case where one or several input pointers point to the - output variable, we need to store the significand in a new temporary - area (as usual), because these inputs may still need to be read for - the possible TMD resolution. Alternatively, since this is not - necessarily a rare case (doing s += sum(x[i],0<=i<n) should not be - regarded as uncommon), it may be better to optimize it by allocating - a bit more for the second sum_raw invocation and delaying the copy of - the significand when this occurs. Add a testcase to "tsum.c". - Remove the sentences about overlapping from doc/mpfr.texi once this is - fixed. */ +/* FIXME/TODO: Support reused arguments. This should now be done, but + has not been tested yet. Some things remain to do: + - Check that there are no regressions in timings. + - Add test cases to "tsum.c" (make sure that at least one fails with + the previous code, which implies that an input argument used as the + output is read to resolve the TMD). + - Remove the sentences about overlapping from doc/mpfr.texi once the + support for reused arguments has been confirmed. +*/ /* See the doc/sum.txt file for the algorithm and a part of its proof (this will later go into algorithms.tex). @@ -66,8 +65,8 @@ VL: This is very different: int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 }; #endif -/* Update minexp after detecting a potential integer overflow in extreme - cases (only a 32-bit ABI may be concerned in practice). +/* Update minexp (V) after detecting a potential integer overflow in + extreme cases (only a 32-bit ABI may be concerned in practice). Instead of an assertion failure below, we could 1. check that the ulp of each regular input has an exponent >= MPFR_EXP_MIN (with an assertion failure if this is not the case); @@ -78,12 +77,12 @@ int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 }; easily testable due to these huge precisions. Moreover, switching to a 64-bit ABI would be a better solution for such computations. So, let's leave this unimplemented. */ -#define UPDATE_MINEXP(E,SH) \ +#define SAFE_SUB(V,E,SH) \ do \ { \ mpfr_prec_t sh = (SH); \ MPFR_ASSERTN ((E) >= MPFR_EXP_MIN + sh); \ - minexp = (E) - sh; \ + V = (E) - sh; \ } \ while (0) @@ -108,8 +107,8 @@ int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 }; * logn: ceil(log2(rn)), where rn is the number of regular inputs. * prec: lower bound for e - err (as described above). * ep: pointer to mpfr_exp_t (see below), or a null pointer. - * minexpp: pointer to mpfr_exp_t (see below). - * maxexpp: pointer to mpfr_exp_t (see below). + * minexpp: pointer to mpfr_exp_t (see below), or a null pointer. + * maxexpp: pointer to mpfr_exp_t (see below), or a null pointer. * * Preconditions: * prec >= 1 @@ -119,8 +118,9 @@ int __gmpfr_cov_sum_tmd[MPFR_RND_MAX][2][2][3][2][2] = { 0 }; * the exact sum for this sum_raw invocation is 0), otherwise the number * of cancelled bits (>= 1), defined as the number of identical bits on * the most significant part of the accumulator. In the latter case, it - * also returns the following data in variables passed by reference: - * - in ep: the exponent e of the computed result (unless ep is NULL); + * also returns the following data in variables passed by reference, if + * the pointers are not NULL: + * - in ep: the exponent e of the computed result; * - in minexpp: the last value of minexp; * - in maxexpp: the new value of maxexp (for the next iteration after * the first invocation of sum_raw in the main code). @@ -430,10 +430,20 @@ sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, mpfr_ptr *const x, MPFR_LOG_MSG (("(err=%" MPFR_EXP_FSPEC "d) <= (e=%" MPFR_EXP_FSPEC "d) - (prec=%Pd)\n", (mpfr_eexp_t) err, (mpfr_eexp_t) e, prec)); + /* To avoid tests or copies, we consider the only two cases + that will occur in sum_aux. */ + MPFR_ASSERTD ((ep != NULL && + minexpp != NULL && + maxexpp != NULL) || + (ep == NULL && + minexpp == NULL && + maxexpp == NULL)); if (ep != NULL) - *ep = e; - *minexpp = minexp; - *maxexpp = maxexp2; + { + *ep = e; + *minexpp = minexp; + *maxexpp = maxexp2; + } MPFR_LOG_MSG (("return with minexp=%" MPFR_EXP_FSPEC "d maxexp2=%" MPFR_EXP_FSPEC "d%s\n", (mpfr_eexp_t) minexp, (mpfr_eexp_t) maxexp2, @@ -476,7 +486,7 @@ sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, mpfr_ptr *const x, MPN_COPY_DECR (wp + shifts, wp, ws - shifts); MPN_ZERO (wp, shifts); /* Compute minexp = minexp - shiftq safely. */ - UPDATE_MINEXP (minexp, shiftq); + SAFE_SUB (minexp, minexp, shiftq); MPFR_ASSERTD (minexp < maxexp2); } } @@ -489,7 +499,7 @@ sum_raw (mp_limb_t *wp, mp_size_t ws, mpfr_prec_t wq, mpfr_ptr *const x, { MPFR_LOG_MSG (("accumulator = 0, reiterate\n", 0)); /* Compute minexp = maxexp2 - (wq - (logn + 1)) safely. */ - UPDATE_MINEXP (maxexp2, wq - (logn + 1)); + SAFE_SUB (minexp, maxexp2, wq - (logn + 1)); /* Note: the logn + 1 corresponds to cq in the main code. */ } } @@ -511,6 +521,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, mp_limb_t *wp; /* pointer to the accumulator */ mp_size_t ts; /* size of the temporary area, in limbs */ mp_size_t ws; /* size of the accumulator, in limbs */ + mp_size_t zs; /* size of the TMD accumulator, in limbs */ mpfr_prec_t wq; /* size of the accumulator, in bits */ int logn; /* ceil(log2(rn)) */ int cq; @@ -552,6 +563,9 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, wq = (mpfr_prec_t) ws * GMP_NUMB_BITS; MPFR_ASSERTD (wq - cq - sq >= 4); + /* TODO: timings, comparing with a larger zs. */ + zs = MPFR_PREC2LIMBS (wq - sq); + MPFR_LOG_MSG (("cq=%d sq=%Pd logn=%d wq=%Pd\n", cq, sq, logn, wq)); /* An input block will have up to wq - cq bits, and its shifted value @@ -560,16 +574,38 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, MPFR_TMP_MARK (marker); - tp = MPFR_TMP_LIMBS_ALLOC (ts + ws); + /* Note: If the TMD does not occur, which should be the case for most + sums, allocating zs limbs is not necessary. However, we choose to + do this now (thus in all cases) because zs is very small, so that + the difference on the memory footprint will not be noticeable. + More precisely, zs is at most 2 in practice with the current code; + we may want to increase it in order to avoid performance issues in + some unlikely corner cases, but even in this case, it will remain + small. + One will have: + [------ ts ------][------ ws ------][- zs -] + The following would probably be better: + [------ ts ------] [------ ws ------] + [- zs -] + i.e. where the TMD accumulator (partially or completely) takes + some unneeded part of the temporary area in order to improve + data locality. But + * in low precision, data locality is regarded as ensured even + with the actual choice; + * in high precision, data locality for TMD resolution may not + be that important. + */ + tp = MPFR_TMP_LIMBS_ALLOC (ts + ws + zs); wp = tp + ts; MPN_ZERO (wp, ws); /* zero the accumulator */ { - mpfr_exp_t minexp; /* exponent of the LSB of the block */ + mpfr_exp_t minexp; /* exponent of the LSB of the block for sum_raw */ mpfr_prec_t cancel; /* number of cancelled bits */ mpfr_exp_t e; /* temporary exponent of the result */ mpfr_exp_t u; /* temporary exponent of the ulp (quantum) */ + mp_limb_t lbit; /* last bit (useful if even rounding) */ mp_limb_t rbit; /* rounding bit (corrected in halfway case) */ int corr; /* correction term (from -1 to 2) */ int sd, sh; /* shift counts */ @@ -582,7 +618,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, MPFR_LOG_MSG (("Compute an approximation with sum_raw...\n", 0)); /* Compute minexp = maxexp - (wq - cq) safely. */ - UPDATE_MINEXP (maxexp, wq - cq); + SAFE_SUB (minexp, maxexp, wq - cq); MPFR_ASSERTD (wq >= logn + sq + 5); cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts, logn, sq + 3, &e, &minexp, &maxexp); @@ -635,7 +671,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, if (MPFR_LIKELY (u > minexp)) { mpfr_prec_t tq; - mp_size_t ei, fi, wi; + mp_size_t wi; int td; tq = u - minexp; @@ -644,24 +680,9 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, wi = tq / GMP_NUMB_BITS; - if (MPFR_LIKELY (sh != 0)) - { - ei = (e - minexp) / GMP_NUMB_BITS; - fi = ei - (sn - 1); - MPFR_ASSERTD (fi == wi || fi == wi + 1); - mpn_lshift (sump, wp + fi, sn, sh); - if (fi != wi) - sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh); - } - else - { - MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS - == cancel); - MPN_COPY (sump, wp + wi, sn); - } - /* Determine the rounding bit, which is represented. */ td = tq % GMP_NUMB_BITS; + lbit = (wp[wi] >> td) & 1; rbit = td >= 1 ? ((wp[wi] >> (td - 1)) & MPFR_LIMB_ONE) : (MPFR_ASSERTD (wi >= 1), wp[wi-1] >> (GMP_NUMB_BITS - 1)); MPFR_ASSERTD (rbit == 0 || rbit == 1); @@ -674,8 +695,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, inex = rounding bit || sticky bit. In round to nearest, also determine the rounding direction: obtained from the rounding bit possibly except in halfway cases. */ - if (MPFR_LIKELY (rbit == 0 || - (rnd == MPFR_RNDN && ((wp[wi] >> td) & 1) == 0))) + if (MPFR_LIKELY (rbit == 0 || (rnd == MPFR_RNDN && lbit == 0))) { /* We need to determine the sticky bit, either to set inex (if the rounding bit is 0) or to possibly "correct" rbit @@ -691,10 +711,8 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, if (!inex) { - mp_size_t wj = wi; - - while (!inex && wj > 0) - inex = wp[--wj] != 0; + while (!inex && wi > 0) + inex = wp[--wi] != 0; if (!inex && rbit != 0) { /* sticky bit = 0, rounding bit = 1, @@ -810,32 +828,24 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, } else /* u <= minexp */ { - mp_size_t en; - - en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS; - if (MPFR_LIKELY (sh != 0)) - mpn_lshift (sump + sn - en, wp, en, sh); - else if (MPFR_UNLIKELY (en > 0)) - MPN_COPY (sump + sn - en, wp, en); - if (sn > en) - MPN_ZERO (sump, sn - en); - - /* The exact value of the accumulator has been copied. + /* The exact value of the accumulator will be copied. * The TMD occurs if and only if there are bits still * not taken into account, and if it occurs, this is * necessarily on a machine number (-> tmd = 1). */ + lbit = u == minexp ? wp[0] & 1 : 0; rbit = 0; inex = tmd = maxexp != MPFR_EXP_MIN; } - /* Leading bit: 1 if positive, 0 if negative. */ - pos = sump[sn-1] >> (GMP_NUMB_BITS - 1); + /* pos = 1 if positive, 0 if negative. */ + /* TODO: change to neg? */ + pos = ! (wp[ws-1] >> (GMP_NUMB_BITS - 1)); MPFR_ASSERTD (rbit == 0 || rbit == 1); - MPFR_LOG_MSG (("tmd=%d rbit=%d inex=%d pos=%d\n", - tmd, (int) rbit, inex, pos)); + MPFR_LOG_MSG (("tmd=%d lbit=%d rbit=%d inex=%d pos=%d\n", + tmd, (int) lbit, (int) rbit, inex, pos)); /* Here, if the final sum is known to be exact, inex = 0, otherwise * inex = 1. We have a truncated significand, a trailing term t such @@ -882,26 +892,34 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, } else { + mpfr_exp_t minexp2; + mpfr_prec_t cancel2; mpfr_exp_t err; /* exponent of the error bound */ - mp_size_t zs; - int sst; /* sign of the secondary term */ + mp_size_t zz; /* number of limbs to zero in the TMD accumulator */ + mp_limb_t *zp; /* pointer to the TMD accumulator */ + mpfr_prec_t zq; /* size of the TMD accumulator, in bits */ + int sst; /* sign of the secondary term */ + + /* TMD case. Here we use a new variable minexp2, with the same + meaning as minexp, as we want to keep the minexp value for + the copy to the destination. */ MPFR_ASSERTD (maxexp > MPFR_EXP_MIN); MPFR_ASSERTD (tmd == 1 || tmd == 2); - /* New accumulator size */ - ws = MPFR_PREC2LIMBS (wq - sq); - wq = (mpfr_prec_t) ws * GMP_NUMB_BITS; + /* TMD accumulator */ + zp = wp + ws; + zq = (mpfr_prec_t) zs * GMP_NUMB_BITS; err = maxexp + logn; MPFR_LOG_MSG (("TMD with" " maxexp=%" MPFR_EXP_FSPEC "d" " err=%" MPFR_EXP_FSPEC "d" - " ws=%Pd" - " wq=%Pd\n", + " zs=%Pd" + " zq=%Pd\n", (mpfr_eexp_t) maxexp, (mpfr_eexp_t) err, - (mpfr_prec_t) ws, wq)); + (mpfr_prec_t) zs, zq)); /* The d-1 bits from u-2 to u-d (= err) are identical. */ @@ -924,22 +942,22 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, { wi++; /* number of words with represented bits */ td = GMP_NUMB_BITS - td; - zs = ws - wi; - MPFR_ASSERTD (zs >= 0 && zs < ws); - mpn_lshift (wp + zs, wp, wi, td); + zz = zs - wi; + MPFR_ASSERTD (zz >= 0 && zz < zs); + mpn_lshift (zp + zz, wp, wi, td); } else { MPFR_ASSERTD (wi > 0); - zs = ws - wi; - MPFR_ASSERTD (zs >= 0 && zs < ws); - if (zs > 0) - MPN_COPY_DECR (wp + zs, wp, wi); + zz = zs - wi; + MPFR_ASSERTD (zz >= 0 && zz < zs); + if (zz > 0) + MPN_COPY_DECR (zp + zz, wp, wi); } - /* Compute minexp = minexp - (zs * GMP_NUMB_BITS + td) safely. */ - UPDATE_MINEXP (minexp, zs * GMP_NUMB_BITS + td); - MPFR_ASSERTD (minexp == err + 2 - wq); + /* Compute minexp2 = minexp - (zs * GMP_NUMB_BITS + td) safely. */ + SAFE_SUB (minexp2, minexp, zz * GMP_NUMB_BITS + td); + MPFR_ASSERTD (minexp2 == err + 2 - zq); } else /* err < minexp */ { @@ -949,25 +967,25 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, from maxexp, with cq bits reserved to avoid an overflow (as in the early steps). */ MPFR_LOG_MSG (("[TMD] err < minexp\n", 0)); - zs = ws; + zz = zs; - /* Compute minexp = maxexp - (wq - cq) safely. */ - UPDATE_MINEXP (maxexp, wq - cq); - MPFR_ASSERTD (minexp == err + 1 - wq); + /* Compute minexp2 = maxexp - (zq - cq) safely. */ + SAFE_SUB (minexp2, maxexp, zq - cq); + MPFR_ASSERTD (minexp2 == err + 1 - zq); } - MPN_ZERO (wp, zs); + MPN_ZERO (zp, zz); /* We need to determine the sign sst of the secondary term. In sum_raw, since the truncated sum corresponding to this secondary term will be in [2^(e-1),2^e] and the error strictly less than 2^err, we can stop the iterations when e - err >= 1 (this bound is the 11th argument of sum_raw). */ - cancel = sum_raw (wp, ws, wq, x, n, minexp, maxexp, tp, ts, - logn, 1, NULL, &minexp, &maxexp); + cancel2 = sum_raw (zp, zs, zq, x, n, minexp2, maxexp, tp, ts, + logn, 1, NULL, NULL, NULL); - if (cancel != 0) - sst = MPFR_LIMB_MSB (wp[ws-1]) == 0 ? 1 : -1; + if (cancel2 != 0) + sst = MPFR_LIMB_MSB (zp[zs-1]) == 0 ? 1 : -1; else if (tmd == 1) sst = 0; else @@ -977,7 +995,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, which depends on the last bit of the pre-rounded result. */ MPFR_ASSERTD (rnd == MPFR_RNDN && tmd == 2); - sst = (sump[0] & (MPFR_LIMB_ONE << sd)) ? 1 : -1; + sst = lbit != 0 ? 1 : -1; } MPFR_LOG_MSG (("[TMD] tmd=%d rbit=%d sst=%d\n", @@ -985,7 +1003,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, /* Do not consider the corrected sst for MPFR_COV_SET */ MPFR_COV_SET (sum_tmd[(int) rnd][tmd-1][rbit] - [cancel == 0 ? 1 : sst+1][pos][sq > MPFR_PREC_MIN]); + [cancel2 == 0 ? 1 : sst+1][pos][sq > MPFR_PREC_MIN]); inex = MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) ? (sst ? -1 : 0) : @@ -993,7 +1011,7 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, (MPFR_ASSERTD (rnd == MPFR_RNDN), tmd == 1 ? - sst : sst); - if (tmd == 2 && sst == (rbit ? -1 : 1)) + if (tmd == 2 && sst == (rbit != 0 ? -1 : 1)) corr = 1 - (int) rbit; else if (MPFR_IS_LIKE_RNDD (rnd, pos ? 1 : -1) && sst == -1) corr = (int) rbit - 1; @@ -1013,6 +1031,42 @@ sum_aux (mpfr_ptr sum, mpfr_ptr *const x, unsigned long n, mpfr_rnd_t rnd, MPFR_SIGN (sum) = pos ? MPFR_SIGN_POS : MPFR_SIGN_NEG; + if (MPFR_LIKELY (u > minexp)) + { + mp_size_t wi; + + /* Recompute the initial value of wi. */ + wi = (u - minexp) / GMP_NUMB_BITS; + if (MPFR_LIKELY (sh != 0)) + { + mp_size_t fi; + + fi = (e - minexp) / GMP_NUMB_BITS - (sn - 1); + MPFR_ASSERTD (fi == wi || fi == wi + 1); + mpn_lshift (sump, wp + fi, sn, sh); + if (fi != wi) + sump[0] |= wp[wi] >> (GMP_NUMB_BITS - sh); + } + else + { + MPFR_ASSERTD ((mpfr_prec_t) (ws - (wi + sn)) * GMP_NUMB_BITS + == cancel); + MPN_COPY (sump, wp + wi, sn); + } + } + else /* u <= minexp */ + { + mp_size_t en; + + en = (e - minexp + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS; + if (MPFR_LIKELY (sh != 0)) + mpn_lshift (sump + sn - en, wp, en, sh); + else if (MPFR_UNLIKELY (en > 0)) + MPN_COPY (sump + sn - en, wp, en); + if (sn > en) + MPN_ZERO (sump, sn - en); + } + if (MPFR_UNLIKELY (sq == 1)) /* precision 1 */ { sump[0] = MPFR_LIMB_HIGHBIT; diff --git a/src/ubf.c b/src/ubf.c new file mode 100644 index 000000000..e43e0e41d --- /dev/null +++ b/src/ubf.c @@ -0,0 +1,226 @@ +/* Functions to work with unbounded floats (limited low-level interface). + +Copyright 2016 Free Software Foundation, Inc. +Contributed by the AriC and Caramba projects, INRIA. + +This file is part of the GNU MPFR Library. + +The GNU MPFR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MPFR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see +http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ + +#include "mpfr-impl.h" + +/* Note: In MPFR math functions, even if UBF code is not called first, + we may still need to handle special values NaN and infinities here. + Indeed, for MAXR * MAXR + (-inf), even though this is a special case, + the computation will also generate an overflow due to MAXR * MAXR, + so that UBF code will be called anyway (except if special cases are + detected and handled separately, but for polynomial, this should not + be needed). */ + +/* This function does not change the flags. */ +static void +mpfr_get_zexp (mpz_ptr ez, mpfr_srcptr x) +{ + mpz_init (ez); + + if (MPFR_IS_UBF (x)) + mpz_set (ez, MPFR_ZEXP (x)); + else + { + mp_limb_t e_limb[MPFR_EXP_LIMB_SIZE]; + mpfr_t e; + int inex; + MPFR_SAVE_EXPO_DECL (expo); + + /* TODO: Once this has been tested, optimize based on whether + _MPFR_EXP_FORMAT <= 3. */ + MPFR_TMP_INIT1 (e_limb, e, sizeof (mpfr_exp_t) * CHAR_BIT); + MPFR_SAVE_EXPO_MARK (expo); + MPFR_DBGRES (inex = mpfr_set_exp_t (e, MPFR_GET_EXP (x), MPFR_RNDN)); + MPFR_ASSERTD (inex == 0); + MPFR_DBGRES (inex = mpfr_get_z (ez, e, MPFR_RNDN)); + MPFR_ASSERTD (inex == 0); + MPFR_SAVE_EXPO_FREE (expo); + } +} + +/* Exact product. The number a is assumed to have enough allocated memory, + where the trailing bits are regarded as being part of the input numbers + (no reallocation is attempted and no check is performed as MPFR_TMP_INIT + could have been used). The arguments b and c may actually be UBF numbers + (mpfr_srcptr can be seen a bit like void *, but is stronger). + This function does not change the flags, except in case of NaN. */ +void +mpfr_ubf_mul_exact (mpfr_ubf_ptr a, mpfr_srcptr b, mpfr_srcptr c) +{ + MPFR_LOG_FUNC + (("b[%Pu]=%.*Rg c[%Pu]=%.*Rg", + mpfr_get_prec (b), mpfr_log_prec, b, + mpfr_get_prec (c), mpfr_log_prec, c), + ("a[%Pu]=%.*Rg", + mpfr_get_prec (a), mpfr_log_prec, a)); + + MPFR_ASSERTD ((mpfr_ptr) a != b); + MPFR_ASSERTD ((mpfr_ptr) a != c); + MPFR_SIGN (a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); + + if (MPFR_ARE_SINGULAR (b, c)) + { + if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c)) + MPFR_SET_NAN (a); + else if (MPFR_IS_INF (b)) + { + if (MPFR_NOTZERO (c)) + MPFR_SET_INF (a); + else + MPFR_SET_NAN (a); + } + else if (MPFR_IS_INF (c)) + { + if (!MPFR_IS_ZERO (b)) + MPFR_SET_INF (a); + else + MPFR_SET_NAN (a); + } + else + { + MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); + MPFR_SET_ZERO (a); + } + } + else + { + mpfr_exp_t e; + mp_size_t bn, cn; + mpfr_limb_ptr ap; + mp_limb_t u, v; + int m; + + /* Note about the code below: For the choice of the precision of + * the result a, one could choose PREC(b) + PREC(c), instead of + * taking whole limbs into account, but in most cases where one + * would gain one limb, one would need to copy the significand + * instead of a no-op (see the mul.c code). + * But in the case MPFR_LIMB_MSB (u) == 0, if the result fits in + * an-1 limbs, one could actually do + * mpn_rshift (ap, ap, k, GMP_NUMB_BITS - 1) + * instead of + * mpn_lshift (ap, ap, k, 1) + * to gain one limb (and reduce the precision), replacing a shift + * by another one. Would this be interesting? + */ + + bn = MPFR_LIMB_SIZE (b); + cn = MPFR_LIMB_SIZE (c); + + ap = MPFR_MANT (a); + + u = (bn >= cn) ? + mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) : + mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn); + if (MPFR_UNLIKELY (MPFR_LIMB_MSB (u) == 0)) + { + m = 1; + MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1)); + MPFR_ASSERTD (v == 0); + } + else + m = 0; + + if (! MPFR_IS_UBF (b) && ! MPFR_IS_UBF (c) && + (e = MPFR_GET_EXP (b) + MPFR_GET_EXP (c) - m, + MPFR_EXP_IN_RANGE (e))) + { + MPFR_SET_EXP (a, e); + } + else + { + mpz_t be, ce; + + mpz_init (MPFR_ZEXP (a)); + + /* This may involve copies of mpz_t, but exponents should not be + very large integers anyway. */ + mpfr_get_zexp (be, b); + mpfr_get_zexp (ce, c); + mpz_add (MPFR_ZEXP (a), be, ce); + mpz_clear (be); + mpz_clear (ce); + mpz_sub_ui (MPFR_ZEXP (a), MPFR_ZEXP (a), m); + MPFR_SET_UBF (a); + } + } +} + +int +mpfr_ubf_exp_less_p (mpfr_srcptr x, mpfr_srcptr y) +{ + mpz_t xe, ye; + int c; + + mpfr_get_zexp (xe, x); + mpfr_get_zexp (ye, y); + c = mpz_cmp (xe, ye) < 0; + mpz_clear (xe); + mpz_clear (ye); + return c; +} + +/* Convert an mpz_t to an mpfr_exp_t, restricted to + the interval [MPFR_EXP_MIN,MPFR_EXP_MAX]. */ +mpfr_exp_t +mpfr_ubf_zexp2exp (mpz_ptr ez) +{ + mp_size_t n; + mpfr_eexp_t e; + mpfr_t d; + int inex; + MPFR_SAVE_EXPO_DECL (expo); + + n = ABSIZ (ez); /* limb size of ez */ + if (n == 0) + return 0; + + MPFR_SAVE_EXPO_MARK (expo); + mpfr_init2 (d, n * GMP_NUMB_BITS); + MPFR_DBGRES (inex = mpfr_set_z (d, ez, MPFR_RNDN)); + MPFR_ASSERTD (inex == 0); + e = mpfr_get_exp_t (d, MPFR_RNDZ); + mpfr_clear (d); + MPFR_SAVE_EXPO_FREE (expo); + if (MPFR_UNLIKELY (e < MPFR_EXP_MIN)) + return MPFR_EXP_MIN; + if (MPFR_UNLIKELY (e > MPFR_EXP_MAX)) + return MPFR_EXP_MAX; + return e; +} + +/* Return the difference of the exponents of x and y, restricted to + the interval [MPFR_EXP_MIN,MPFR_EXP_MAX]. */ +mpfr_exp_t +mpfr_ubf_diff_exp (mpfr_srcptr x, mpfr_srcptr y) +{ + mpz_t xe, ye; + mpfr_exp_t e; + + mpfr_get_zexp (xe, x); + mpfr_get_zexp (ye, y); + mpz_sub (xe, xe, ye); + mpz_clear (ye); + e = mpfr_ubf_zexp2exp (xe); + mpz_clear (xe); + return e; +} diff --git a/src/uceil_log2.c b/src/uceil_log2.c index bfb799cae..7ee5166ae 100644 --- a/src/uceil_log2.c +++ b/src/uceil_log2.c @@ -33,7 +33,9 @@ __gmpfr_ceil_log2 (double d) union mpfr_ieee_double_extract x; x.d = d; - exp = x.s.exp - 1023; + /* The cast below is useless in theory, but let us not depend on the + integer promotion rules (for instance, tcc is currently wrong). */ + exp = (long) x.s.exp - 1023; x.s.exp = 1023; /* value for 1 <= d < 2 */ if (x.d != 1.0) /* d: not a power of two? */ exp++; @@ -42,19 +44,19 @@ __gmpfr_ceil_log2 (double d) double m; if (d < 0.0) - return __gmpfr_floor_log2(-d)+1; + return __gmpfr_floor_log2 (-d) + 1; else if (d == 0.0) return -1023; else if (d >= 1.0) { exp = 0; - for( m= 1.0 ; m < d ; m *=2.0 ) + for (m = 1.0; m < d; m *= 2.0) exp++; } else { exp = 1; - for( m= 1.0 ; m >= d ; m *= (1.0/2.0) ) + for (m = 1.0; m >= d; m *= 0.5) exp--; } #endif /* _MPFR_IEEE_FLOATS */ diff --git a/src/vasprintf.c b/src/vasprintf.c index 634b5b189..3e5fb0672 100644 --- a/src/vasprintf.c +++ b/src/vasprintf.c @@ -1552,6 +1552,24 @@ partition_number (struct number_parts *np, mpfr_srcptr p, } } } + else if (MPFR_UNLIKELY (MPFR_IS_UBF (p))) + { + /* mpfr_get_str does not support UBF, so that UBF numbers are regarded + as special cases here. This is not much a problem since UBF numbers + are internal to MPFR and here, they only for logging. */ + if (np->pad_type == LEADING_ZEROS) + /* change to right justification padding with left spaces */ + np->pad_type = LEFT; + + if (MPFR_IS_NEG (p)) + np->sign = '-'; + + np->ip_size = 3; + str = (char *) (*__gmp_allocate_func) (1 + np->ip_size); + strcpy (str, uppercase ? "UBF" : "ubf"); + np->ip_ptr = register_string (np->sl, str); + /* TODO: output more information (e.g. the exponent) if need be. */ + } else { MPFR_ASSERTD (MPFR_IS_PURE_FP (p)); |