/* mpfr_mul -- multiply two floating-point numbers Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is part of the MPFR Library. The MPFR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The MPFR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPFR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Place, Fifth Floor, Boston, MA 02110-1301, USA. */ #define MPFR_NEED_LONGLONG_H #include "mpfr-impl.h" /********* BEGINNING CHECK *************/ /* Check if we have to check the result of mpfr_mul. TODO: Find a better (and faster?) check than using old implementation */ #ifdef WANT_ASSERT # if WANT_ASSERT >= 3 int mpfr_mul2 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode); static int mpfr_mul3 (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode) { /* Old implementation */ int sign_product, cc, inexact; mp_exp_t ax; mp_limb_t *tmp; mp_limb_t b1; mp_prec_t bq, cq; mp_size_t bn, cn, tn, k; MPFR_TMP_DECL(marker); /* deal with special cases */ if (MPFR_ARE_SINGULAR(b,c)) { if (MPFR_IS_NAN(b) || MPFR_IS_NAN(c)) { MPFR_SET_NAN(a); MPFR_RET_NAN; } sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) ); if (MPFR_IS_INF(b)) { if (MPFR_IS_INF(c) || MPFR_NOTZERO(c)) { MPFR_SET_SIGN(a,sign_product); MPFR_SET_INF(a); MPFR_RET(0); /* exact */ } else { MPFR_SET_NAN(a); MPFR_RET_NAN; } } else if (MPFR_IS_INF(c)) { if (MPFR_NOTZERO(b)) { MPFR_SET_SIGN(a, sign_product); MPFR_SET_INF(a); MPFR_RET(0); /* exact */ } else { MPFR_SET_NAN(a); MPFR_RET_NAN; } } else { MPFR_ASSERTD(MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); MPFR_SET_SIGN(a, sign_product); MPFR_SET_ZERO(a); MPFR_RET(0); /* 0 * 0 is exact */ } } MPFR_CLEAR_FLAGS(a); sign_product = MPFR_MULT_SIGN( MPFR_SIGN(b) , MPFR_SIGN(c) ); ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c); bq = MPFR_PREC(b); cq = MPFR_PREC(c); MPFR_ASSERTD(bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */ bn = (bq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of b */ cn = (cq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of c */ k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */ tn = (bq + cq + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; /* <= k, thus no int overflow */ MPFR_ASSERTD(tn <= k); /* Check for no size_t overflow*/ MPFR_ASSERTD((size_t) k <= ((size_t) ~0) / BYTES_PER_MP_LIMB); MPFR_TMP_MARK(marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC((size_t) k * BYTES_PER_MP_LIMB); /* multiplies two mantissa in temporary allocated space */ b1 = (MPFR_LIKELY(bn >= cn)) ? mpn_mul (tmp, MPFR_MANT(b), bn, MPFR_MANT(c), cn) : mpn_mul (tmp, MPFR_MANT(c), cn, MPFR_MANT(b), bn); /* now tmp[0]..tmp[k-1] contains the product of both mantissa, with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */ b1 >>= BITS_PER_MP_LIMB - 1; /* msb from the product */ /* if the mantissas of b and c are uniformly distributed in ]1/2, 1], then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ tmp += k - tn; if (MPFR_UNLIKELY(b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ cc = mpfr_round_raw (MPFR_MANT (a), tmp, bq + cq, MPFR_IS_NEG_SIGN(sign_product), MPFR_PREC (a), rnd_mode, &inexact); /* cc = 1 ==> result is a power of two */ if (MPFR_UNLIKELY(cc)) MPFR_MANT(a)[MPFR_LIMB_SIZE(a)-1] = MPFR_LIMB_HIGHBIT; MPFR_TMP_FREE(marker); { mp_exp_t ax2 = ax + (mp_exp_t) (b1 - 1 + cc); if (MPFR_UNLIKELY( ax2 > __gmpfr_emax)) return mpfr_overflow (a, rnd_mode, sign_product); if (MPFR_UNLIKELY( ax2 < __gmpfr_emin)) { /* In the rounding to the nearest mode, if the exponent of the exact result (i.e. before rounding, i.e. without taking cc into account) is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if both arguments are powers of 2), then round to zero. */ if (rnd_mode == GMP_RNDN && (ax + (mp_exp_t) b1 < __gmpfr_emin || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c)))) rnd_mode = GMP_RNDZ; return mpfr_underflow (a, rnd_mode, sign_product); } MPFR_SET_EXP (a, ax2); MPFR_SET_SIGN(a, sign_product); } return inexact; } int mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode) { mpfr_t ta, tb, tc; int inexact1, inexact2; mpfr_init2 (ta, MPFR_PREC (a)); mpfr_init2 (tb, MPFR_PREC (b)); mpfr_init2 (tc, MPFR_PREC (c)); MPFR_ASSERTN (mpfr_set (tb, b, GMP_RNDN) == 0); MPFR_ASSERTN (mpfr_set (tc, c, GMP_RNDN) == 0); inexact2 = mpfr_mul3 (ta, tb, tc, rnd_mode); inexact1 = mpfr_mul2 (a, b, c, rnd_mode); if (mpfr_cmp (ta, a) || inexact1*inexact2 < 0 || (inexact1*inexact2 == 0 && (inexact1|inexact2) != 0)) { printf("mpfr_mul return different values for %s\n" "Prec_a= %lu Prec_b= %lu Prec_c= %lu\nB=", mpfr_print_rnd_mode (rnd_mode), MPFR_PREC (a), MPFR_PREC (b), MPFR_PREC (c)); mpfr_out_str (stdout, 16, 0, tb, GMP_RNDN); printf("\nC="); mpfr_out_str (stdout, 16, 0, tc, GMP_RNDN); printf("\nOldMul: "); mpfr_out_str (stdout, 16, 0, ta, GMP_RNDN); printf("\nNewMul: "); mpfr_out_str (stdout, 16, 0, a, GMP_RNDN); printf("\nNewInexact = %d | OldInexact = %d\n", inexact1, inexact2); MPFR_ASSERTN(0); } mpfr_clears (ta, tb, tc, NULL); return inexact1; } # define mpfr_mul mpfr_mul2 # endif #endif /****** END OF CHECK *******/ /* Multiply 2 mpfr_t */ int mpfr_mul (mpfr_ptr a, mpfr_srcptr b, mpfr_srcptr c, mp_rnd_t rnd_mode) { int sign, inexact; mp_exp_t ax, ax2; mp_limb_t *tmp; mp_limb_t b1; mp_prec_t bq, cq; mp_size_t bn, cn, tn, k; MPFR_TMP_DECL (marker); MPFR_LOG_FUNC (("b[%#R]=%R c[%#R]=%R rnd=%d", b, b, c, c, rnd_mode), ("a[%#R]=%R inexact=%d", a, a, inexact)); /* deal with special cases */ if (MPFR_ARE_SINGULAR (b, c)) { if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c)) { MPFR_SET_NAN (a); MPFR_RET_NAN; } sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); if (MPFR_IS_INF (b)) { if (!MPFR_IS_ZERO (c)) { MPFR_SET_SIGN (a, sign); MPFR_SET_INF (a); MPFR_RET (0); } else { MPFR_SET_NAN (a); MPFR_RET_NAN; } } else if (MPFR_IS_INF (c)) { if (!MPFR_IS_ZERO (b)) { MPFR_SET_SIGN (a, sign); MPFR_SET_INF (a); MPFR_RET(0); } else { MPFR_SET_NAN (a); MPFR_RET_NAN; } } else { MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); MPFR_SET_SIGN (a, sign); MPFR_SET_ZERO (a); MPFR_RET (0); } } MPFR_CLEAR_FLAGS (a); sign = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); ax = MPFR_GET_EXP (b) + MPFR_GET_EXP (c); /* Note: the exponent of the exact result will be e = bx + cx + ec with ec in {-1,0,1} and the following assumes that e is representable. */ /* FIXME: Useful since we do an exponent check after ? * It is useful iff the precision is big, there is an overflow * and we are doing further mults...*/ #ifdef HUGE if (MPFR_UNLIKELY (ax > __gmpfr_emax + 1)) return mpfr_overflow (a, rnd_mode, sign); if (MPFR_UNLIKELY (ax < __gmpfr_emin - 2)) return mpfr_underflow (a, rnd_mode == GMP_RNDN ? GMP_RNDZ : rnd_mode, sign); #endif bq = MPFR_PREC (b); cq = MPFR_PREC (c); MPFR_ASSERTD (bq+cq > bq); /* PREC_MAX is /2 so no integer overflow */ bn = (bq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of b */ cn = (cq+BITS_PER_MP_LIMB-1)/BITS_PER_MP_LIMB; /* number of limbs of c */ k = bn + cn; /* effective nb of limbs used by b*c (= tn or tn+1) below */ tn = (bq + cq + BITS_PER_MP_LIMB - 1) / BITS_PER_MP_LIMB; MPFR_ASSERTD (tn <= k); /* tn <= k, thus no int overflow */ /* Check for no size_t overflow*/ MPFR_ASSERTD ((size_t) k <= ((size_t) ~0) / BYTES_PER_MP_LIMB); MPFR_TMP_MARK (marker); tmp = (mp_limb_t *) MPFR_TMP_ALLOC ((size_t) k * BYTES_PER_MP_LIMB); /* multiplies two mantissa in temporary allocated space */ if (MPFR_UNLIKELY (bn < cn)) { mpfr_srcptr tmp = b; mp_size_t tn = bn; b = c; bn = cn; c = tmp; cn = tn; } MPFR_ASSERTD (bn >= cn); /* longlong's umul_ppmm seems to be buggy on HP-UX. */ #ifndef __hpux if (MPFR_LIKELY (bn <= 2)) { if (bn == 1) { /* 1 limb * 1 limb */ umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); b1 = tmp[1]; } else if (MPFR_UNLIKELY (cn == 1)) { /* 2 limbs * 1 limb */ mp_limb_t t; umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); umul_ppmm (tmp[2], t, MPFR_MANT (b)[1], MPFR_MANT (c)[0]); add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t); b1 = tmp[2]; } else { /* 2 limbs * 2 limbs */ mp_limb_t t1, t2, t3; /* First 2 limbs * 1 limb */ umul_ppmm (tmp[1], tmp[0], MPFR_MANT (b)[0], MPFR_MANT (c)[0]); umul_ppmm (tmp[2], t1, MPFR_MANT (b)[1], MPFR_MANT (c)[0]); add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], 0, t1); /* Second, the other 2 limbs * 1 limb product */ umul_ppmm (t1, t2, MPFR_MANT (b)[0], MPFR_MANT (c)[1]); umul_ppmm (tmp[3], t3, MPFR_MANT (b)[1], MPFR_MANT (c)[1]); add_ssaaaa (tmp[3], t1, tmp[3], t1, 0, t3); /* Sum those two partial products */ add_ssaaaa (tmp[2], tmp[1], tmp[2], tmp[1], t1, t2); tmp[3] += (tmp[2] < t1); b1 = tmp[3]; } b1 >>= (BITS_PER_MP_LIMB - 1); tmp += k - tn; if (MPFR_UNLIKELY (b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ } else #endif if (MPFR_UNLIKELY (bn > MPFR_MUL_THRESHOLD)) { mp_limb_t *bp, *cp; mp_size_t n; mp_prec_t p; /* Compute estimated precision of mulhigh. We could use `+ (n < cn) + (n < bn)' instead of `+ 2', but does it worth it? */ n = MPFR_LIMB_SIZE (a) + 1; n = MIN (n, cn); MPFR_ASSERTD (n >= 1 && 2*n <= k && n <= cn && n <= bn); p = n*BITS_PER_MP_LIMB - MPFR_INT_CEIL_LOG2 (n + 2); bp = MPFR_MANT (b) + bn - n; cp = MPFR_MANT (c) + cn - n; /* Check if MulHigh can produce a roundable result. We may lost 1 bit due to RNDN, 1 due to final shift. */ if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5)) { if (MPFR_UNLIKELY (MPFR_PREC (a) > p - 5 + BITS_PER_MP_LIMB || bn <= MPFR_MUL_THRESHOLD+1)) { /* MulHigh can't produce a roundable result. */ MPFR_LOG_MSG (("mpfr_mulhigh can't be used (%lu VS %lu)\n", MPFR_PREC (a), p)); goto full_multiply; } /* Add one extra limb to mantissa of b and c. */ if (bn > n) bp --; else { bp = MPFR_TMP_ALLOC ((n+1)*sizeof (mp_limb_t)); bp[0] = 0; MPN_COPY (bp+1, MPFR_MANT (b)+bn-n, n); } if (cn > n) cp --; /* FIXME: Could this happen? */ else { cp = MPFR_TMP_ALLOC ((n+1)*sizeof (mp_limb_t)); cp[0] = 0; MPN_COPY (cp+1, MPFR_MANT (c)+cn-n, n); } /* We will compute with one extra limb */ n++; p = n*BITS_PER_MP_LIMB - MPFR_INT_CEIL_LOG2 (n + 2); /* Due to some nasty reasons we can have only 4 bits */ MPFR_ASSERTD (MPFR_PREC (a) <= p - 4); if (MPFR_LIKELY (k < 2*n)) { tmp = MPFR_TMP_ALLOC (2*n*sizeof (mp_limb_t)); tmp += 2*n-k; /* `tmp' still points to an area of `k' limbs */ } } MPFR_LOG_MSG (("Use mpfr_mulhigh (%lu VS %lu)\n", MPFR_PREC (a), p)); /* Compute an approximation of the product of b and c */ mpfr_mulhigh_n (tmp+k-2*n, bp, cp, n); /* now tmp[0]..tmp[k-1] contains the product of both mantissa, with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */ b1 = tmp[k-1] >> (BITS_PER_MP_LIMB - 1); /* msb from the product */ /* If the mantissas of b and c are uniformly distributed in ]1/2, 1], then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ tmp += k - tn; if (MPFR_UNLIKELY (b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); MPFR_ASSERTD (MPFR_LIMB_MSB (tmp[tn-1]) != 0); if (MPFR_UNLIKELY (!mpfr_round_p (tmp, tn, p+b1-1, MPFR_PREC(a)+(rnd_mode==GMP_RNDN)))) { tmp -= k-tn; /* tmp may have changed, FIX IT!!!!! */ goto full_multiply; } } else { full_multiply: MPFR_LOG_MSG (("Use mpn_mul\n", 0)); b1 = mpn_mul (tmp, MPFR_MANT (b), bn, MPFR_MANT (c), cn); /* now tmp[0]..tmp[k-1] contains the product of both mantissa, with tmp[k-1]>=2^(BITS_PER_MP_LIMB-2) */ b1 >>= BITS_PER_MP_LIMB - 1; /* msb from the product */ /* if the mantissas of b and c are uniformly distributed in ]1/2, 1], then their product is in ]1/4, 1/2] with probability 2*ln(2)-1 ~ 0.386 and in [1/2, 1] with probability 2-2*ln(2) ~ 0.614 */ tmp += k - tn; if (MPFR_UNLIKELY (b1 == 0)) mpn_lshift (tmp, tmp, tn, 1); /* tn <= k, so no stack corruption */ } ax2 = ax + (mp_exp_t) (b1 - 1); MPFR_RNDRAW (inexact, a, tmp, bq+cq, rnd_mode, sign, ax2++); MPFR_TMP_FREE (marker); MPFR_EXP (a) = ax2; /* Can't use MPFR_SET_EXP: Expo may be out of range */ MPFR_SET_SIGN (a, sign); if (MPFR_UNLIKELY (ax2 > __gmpfr_emax)) return mpfr_overflow (a, rnd_mode, sign); if (MPFR_UNLIKELY (ax2 < __gmpfr_emin)) { /* In the rounding to the nearest mode, if the exponent of the exact result (i.e. before rounding, i.e. without taking cc into account) is < __gmpfr_emin - 1 or the exact result is a power of 2 (i.e. if both arguments are powers of 2), then round to zero. */ if (rnd_mode == GMP_RNDN && (ax + (mp_exp_t) b1 < __gmpfr_emin || (mpfr_powerof2_raw (b) && mpfr_powerof2_raw (c)))) rnd_mode = GMP_RNDZ; return mpfr_underflow (a, rnd_mode, sign); } return inexact; }