diff options
Diffstat (limited to 'libgcc/config/libbid/bid128_mul.c')
-rw-r--r-- | libgcc/config/libbid/bid128_mul.c | 2088 |
1 files changed, 356 insertions, 1732 deletions
diff --git a/libgcc/config/libbid/bid128_mul.c b/libgcc/config/libbid/bid128_mul.c index f5fe5e86871..5a60aed98be 100644 --- a/libgcc/config/libbid/bid128_mul.c +++ b/libgcc/config/libbid/bid128_mul.c @@ -30,1775 +30,399 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #if DECIMAL_CALL_BY_REFERENCE void -__bid128_mul (UINT128 * pres, UINT128 * px, - UINT128 * - py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM - _EXC_INFO_PARAM) { - UINT128 x = *px, y = *py; - +bid64dq_mul (UINT64 * pres, UINT64 * px, UINT128 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; #if !DECIMAL_GLOBAL_ROUNDING unsigned int rnd_mode = *prnd_mode; - #endif #else -UINT128 -__bid128_mul (UINT128 x, - UINT128 y _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM - _EXC_INFO_PARAM) { - +UINT64 +bid64dq_mul (UINT64 x, UINT128 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { #endif - UINT128 res; - UINT64 x_sign, y_sign, sign; - UINT64 x_exp, y_exp; - - // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) - // Note: C2.w[1], C2.w[0] represent y_signif_hi, y_signif_lo (all are UINT64) - UINT64 tmp64, tmp64A; - BID_UI64DOUBLE tmp1, tmp2; - int x_nr_bits, y_nr_bits; - int q1, q2, ind, shift; - UINT128 C1, C2; - UINT128 Cstar; // C* represents up to 34 decimal digits ~ 113 bits - UINT384 fstar; - int q; - UINT128 P128, R128; // for underflow path - UINT192 P192, R192; // for underflow path - UINT256 C, P256, R256; - UINT384 P384; - UINT512 P512; - int incr_exp = 0; // for underflow path - int incr_exp1 = 0; // for underflow path - int tmp_fpa = 0; // if possible underflow and q>=34, use to undo the rounding - UINT64 C1_hi, C2_hi; - UINT64 C1_lo, C2_lo; - int is_inexact = 0; - int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; - int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; - int is_midpoint_lt_even1 = 0, is_midpoint_gt_even1 = 0; - int is_inexact_lt_midpoint1 = 0, is_inexact_gt_midpoint1 = 0; - int is_overflow = 0; - int no_underflow = 0; + UINT64 res = 0xbaddbaddbaddbaddull; + UINT128 x1; - // unpack the arguments - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1_hi = x.w[1] & MASK_COEFF; - C1_lo = x.w[0]; - - // unpack y - y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C2_hi = y.w[1] & MASK_COEFF; - C2_lo = y.w[0]; - sign = x_sign ^ y_sign; - - // check for NaN or Infinity - if (((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) - || ((y.w[1] & MASK_SPECIAL) == MASK_SPECIAL)) { +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qq_mul (&res, &x1, py + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid64qq_mul (x1, y + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // x is special or y is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return quiet (x) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - } - // return x - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - } - BID_RETURN (res); - } else if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN - if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qd_mul (UINT64 * pres, UINT128 * px, UINT64 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qd_mul (UINT128 x, UINT64 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT64 res = 0xbaddbaddbaddbaddull; + UINT128 y1; - // return quiet (y) - res.w[1] = y.w[1] & 0xfdffffffffffffffull; - res.w[0] = y.w[0]; - } else { // y is QNaN - // return y - res.w[1] = y.w[1]; - res.w[0] = y.w[0]; - } - BID_RETURN (res); - } else { // neither x nor y is NaN; at least one is infinity - if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x is infinity - if (((y.w[1] & MASK_ANY_INF) == MASK_INF) || (C2_hi != 0x0ull) - || (C2_lo != 0x0ull)) { +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64qq_mul (&res, px, &y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid64qq_mul (x, y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // y is infinity OR y is finite - // if same sign, return +inf otherwise return -inf - if (!sign) { - res.w[1] = 0x7800000000000000ull; // +inf - res.w[0] = 0x0000000000000000ull; - } else { // x and y are infinities of opposite signs - res.w[1] = 0xf800000000000000ull; // -inf - res.w[0] = 0x0000000000000000ull; - } - } else { // if y is 0 - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return QNaN Indefinite - res.w[1] = 0x7c00000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - } else { // x is not NaN or infinity, so y must be infinity - if ((C1_hi != 0x0ull) || (C1_lo != 0x0ull)) { +#if DECIMAL_CALL_BY_REFERENCE +void +bid64qq_mul (UINT64 * pres, UINT128 * px, UINT128 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT64 +bid64qq_mul (UINT128 x, UINT128 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif - // x is finite - // if same sign, return +inf otherwise return -inf - if (!sign) { - res.w[1] = 0x7800000000000000ull; // +inf - res.w[0] = 0x0000000000000000ull; - } else { // y and x are of opposite signs - res.w[1] = 0xf800000000000000ull; // -inf - res.w[0] = 0x0000000000000000ull; - } - } else { // if x is 0 - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; + UINT128 z = { {0x0000000000000000ull, 0x5ffe000000000000ull} + }; + UINT64 res = 0xbaddbaddbaddbaddull; + UINT64 x_sign, y_sign, p_sign; + UINT64 x_exp, y_exp, p_exp; + int true_p_exp; + UINT128 C1, C2; - // return QNaN Indefinite - res.w[1] = 0x7c00000000000000ull; - res.w[0] = 0x0000000000000000ull; - } + BID_SWAP128 (z); + // skip cases where at least one operand is NaN or infinity + if (!(((x.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || + ((y.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || + ((x.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF) || + ((y.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF))) { + // x, y are 0 or f but not inf or NaN => unpack the arguments and check + // for non-canonical values + + x_sign = x.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C1.w[1] = x.w[HIGH_128W] & MASK_COEFF; + C1.w[0] = x.w[LOW_128W]; + // check for non-canonical values - treated as zero + if ((x.w[HIGH_128W] & 0x6000000000000000ull) == + 0x6000000000000000ull) { + // G0_G1=11 => non-canonical + x_exp = (x.w[HIGH_128W] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low + } else { // G0_G1 != 11 + x_exp = x.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull && + C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; } - BID_RETURN (res); - } - } - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) - - // test for non-canonical values of the argument x - if ((((C1_hi > 0x0001ed09bead87c0ull) || - ((C1_hi == 0x0001ed09bead87c0ull) && (C1_lo > 0x378d8e63ffffffffull))) && - ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) || - ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - // check for the case where the exponent is shifted right by 2 bits! - if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { - x_exp = (x.w[1] << 2) & MASK_EXP; // same position as for G[0]G[1] != 11 } - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - C1_hi = 0; - C1_lo = 0; - } - // test for non-canonical values of the argument y - if ((((C2_hi > 0x0001ed09bead87c0ull) - || ((C2_hi == 0x0001ed09bead87c0ull) - && (C2_lo > 0x378d8e63ffffffffull))) - && ((y.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - - // check for the case where the exponent is shifted right by 2 bits! - if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { - y_exp = (y.w[1] << 2) & MASK_EXP; // same position as for G[0]G[1] != 11 - } - y.w[1] = y.w[1] & 0x8000000000000000ull; // preserve the sign bit - y.w[0] = 0; - C2_hi = 0; - C2_lo = 0; - } - if (((C1_hi == 0x0ull) && (C1_lo == 0x0ull)) || ((C2_hi == 0x0ull) - && (C2_lo == 0x0ull))) { - - // x is 0 and y is not special OR y is 0 and x is not special - // if same sign, return +0 otherwise return -0 - ind = (x_exp >> 49) + (y_exp >> 49) - 6176; - if (ind < 0) - ind = 0; - if (ind > 0x2fff) - ind = 0x2fff; // 6111 + 6176 - if ((x.w[1] & MASK_SIGN) == (y.w[1] & MASK_SIGN)) { - res.w[1] = 0x0000000000000000ull | ((UINT64) ind << 49); // +0.0 - res.w[0] = 0x0000000000000000ull; - } else { // opposite signs - res.w[1] = 0x8000000000000000ull | ((UINT64) ind << 49); // -0.0 - res.w[0] = 0x0000000000000000ull; - } - BID_RETURN (res); - } else { // x and y are not special and are not zero - // unpack x - C1.w[1] = C1_hi; - C1.w[0] = C1_lo; - - // q1 = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - }} else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - }} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion - x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } q1 = __bid_nr_digits[x_nr_bits - 1].digits; - if (q1 == 0) { - q1 = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q1++; - } - C2.w[1] = C2_hi; - C2.w[0] = C2_lo; - if (C2.w[1] == 0) { - if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 - tmp2.d = (double) (C2.w[0] >> 32); // exact conversion - y_nr_bits = - 32 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // y < 2^32 - tmp2.d = (double) C2.w[0]; // exact conversion - y_nr_bits = - ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); - }} else { // if y < 2^53 - tmp2.d = (double) C2.w[0]; // exact conversion - y_nr_bits = - ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); - }} else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) - tmp2.d = (double) C2.w[1]; // exact conversion - y_nr_bits = - 64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); - } q2 = __bid_nr_digits[y_nr_bits].digits; - if (q2 == 0) { - q2 = __bid_nr_digits[y_nr_bits].digits1; - if (C2.w[1] > __bid_nr_digits[y_nr_bits].threshold_hi - || (C2.w[1] == __bid_nr_digits[y_nr_bits].threshold_hi - && C2.w[0] >= __bid_nr_digits[y_nr_bits].threshold_lo)) - q2++; - } - // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits - // where 2 <= q1 + q2 <= 68 - // calculate C' = C1 * C2 and determine q - C.w[3] = C.w[2] = C.w[1] = C.w[0] = 0; - if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C' = C1 * C2 fits in 64 bits - C.w[0] = C1.w[0] * C2.w[0]; - - // if C' < 10^(q1+q2-1) then q = q1 + q2 - 1 else q = q1 + q2 - if (C.w[0] < __bid_ten2k64[q1 + q2 - 1]) - q = q1 + q2 - 1; // q in [1, 18] - else - q = q1 + q2; // q in [2, 19] - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; - } else if (q1 + q2 == 20) { // C' = C1 * C2 fits in 64 or 128 bits - // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits - __mul_64x64_to_128MACH (C, C1.w[0], C2.w[0]); - - // if C' < 10^(q1+q2-1) = 10^19 then q = q1+q2-1 = 19 else q = q1+q2 = 20 - if (C.w[1] == 0 && C.w[0] < __bid_ten2k64[19]) { // 19 = q1+q2-1 - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; - q = 19; // 19 = q1 + q2 - 1 - } else { - - // if (C.w[1] == 0) - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - q = 20; // 20 = q1 + q2 - } - } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 - // C' = C1 * C2 fits in 64 or 128 bits - // (64 bits possibly, but only when q1 + q2 = 21 and C' has 20 digits) - // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits - if (q1 <= 19) { - __mul_128x64_to_128 (C, C1.w[0], C2); - } else { // q2 <= 19 - __mul_128x64_to_128 (C, C2.w[0], C1); - } - - // if C' < 10^(q1+q2-1) then q = q1 + q2 - 1 else q = q1 + q2 - if (C.w[1] < __bid_ten2k128[q1 + q2 - 21].w[1] - || (C.w[1] == __bid_ten2k128[q1 + q2 - 21].w[1] - && C.w[0] < __bid_ten2k128[q1 + q2 - 21].w[0])) { - - // if (C.w[1] == 0) // q = 20, necessarily - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - q = q1 + q2 - 1; // q in [20, 37] - } else { - - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - q = q1 + q2; // q in [21, 38] - } - } else if (q1 + q2 == 39) { // C' = C1 * C2 fits in 128 or 192 bits - // both C1 and C2 fit in 128 bits (actually in 113 bits) - // may replace this by 128x128_to192 - __mul_128x128_to_256 (C, C1, C2); // C.w[3] is 0 - // if C' < 10^(q1+q2-1) = 10^38 then q = q1+q2-1 = 38 else q = q1+q2 = 39 - if (C.w[2] == 0 && (C.w[1] < __bid_ten2k128[18].w[1] || - (C.w[1] == __bid_ten2k128[18].w[1] && C.w[0] < __bid_ten2k128[18].w[0]))) { - // 18 = 38 - 20 = q1+q2-1 - 20 - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - q = 38; // 38 = q1 + q2 - 1 - } else { - - // if (C.w[2] == 0) - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - q = 39; // 39 = q1 + q2 - } - } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 - // C' = C1 * C2 fits in 128 or 192 bits - // (128 bits possibly, but only when q1 + q2 = 40 and C' has 39 digits) - // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one - // may fit in 64 bits - if (C1.w[1] == 0) { // C1 fits in 64 bits - // __mul_64x128_full (REShi64, RESlo128, A64, B128) - __mul_64x128_full (C.w[2], C, C1.w[0], C2); - } else if (C2.w[1] == 0) { // C2 fits in 64 bits - // __mul_64x128_full (REShi64, RESlo128, A64, B128) - __mul_64x128_full (C.w[2], C, C2.w[0], C1); - } else { // both C1 and C2 require 128 bits - // may use __mul_128x128_to_192 (C.w[2], C.w[0], C2.w[0], C1); - __mul_128x128_to_256 (C, C1, C2); // C.w[3] = 0 - } - - // if C' < 10^(q1+q2-1) then q = q1 + q2 - 1 else q = q1 + q2 - if (C.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] - || (C.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] - && (C.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] - || (C.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] - && C.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))) { - - // if (C.w[2] == 0) // q = 39, necessarily - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - q = q1 + q2 - 1; // q in [39, 56] - } else { - - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - q = q1 + q2; // q in [40, 57] - } - } else if (q1 + q2 == 58) { // C' = C1 * C2 fits in 192 or 256 bits - // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one - // may fit in 64 bits - if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits - __mul_64x128_full (C.w[2], C, C1.w[0], C2); // may use 64x128_to_192 - } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits - __mul_64x128_full (C.w[2], C, C2.w[0], C1); // may use 64x128_to_192 - } else { // C1 * C2 will fit in 192 bits or in 256 bits - __mul_128x128_to_256 (C, C1, C2); - } - - // if C' < 10^(q1+q2-1) = 10^57 then q = q1+q2-1 = 57 else q = q1+q2 = 58 - if (C.w[3] == 0 && (C.w[2] < __bid_ten2k256[18].w[2] || - (C.w[2] == __bid_ten2k256[18].w[2] && (C.w[1] < __bid_ten2k256[18].w[1] || - (C.w[1] == __bid_ten2k256[18].w[1] && C.w[0] < __bid_ten2k256[18].w[0]))))) { - // 18 = 57 - 39 = q1+q2-1 - 39 - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - q = 57; // 57 = q1 + q2 - 1 - } else { - - // if (C.w[3] == 0) - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; - q = 58; // 58 = q1 + q2 - } - } else { // if 59 <= q1 + q2 <= 68 - // C' = C1 * C2 fits in 192 or 256 bits - // (192 bits possibly, but only when q1 + q2 = 59 and C' has 58 digits) - // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in - // 64 bits - // may use __mul_128x128_to_192 (C.w[2], C.w[0], C2.w[0], C1); - __mul_128x128_to_256 (C, C1, C2); // C.w[3] = 0 - // if C' < 10^(q1+q2-1) then q = q1 + q2 - 1 else q = q1 + q2 - if (C.w[3] < __bid_ten2k256[q1 + q2 - 40].w[3] - || (C.w[3] == __bid_ten2k256[q1 + q2 - 40].w[3] - && (C.w[2] < __bid_ten2k256[q1 + q2 - 40].w[2] - || (C.w[2] == __bid_ten2k256[q1 + q2 - 40].w[2] - && (C.w[1] < __bid_ten2k256[q1 + q2 - 40].w[1] - || (C.w[1] == __bid_ten2k256[q1 + q2 - 40].w[1] - && C.w[0] < __bid_ten2k256[q1 + q2 - 40].w[0])))))) { - - // if (C.w[3] == 0) // q = 58, necessarily - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; - // else - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; - q = q1 + q2 - 1; // q in [58, 67] - } else { - - // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; - q = q1 + q2; // q in [59, 68] + y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C2.w[1] = y.w[HIGH_128W] & MASK_COEFF; + C2.w[0] = y.w[LOW_128W]; + // check for non-canonical values - treated as zero + if ((y.w[HIGH_128W] & 0x6000000000000000ull) == + 0x6000000000000000ull) { + // G0_G1=11 => non-canonical + y_exp = (y.w[HIGH_128W] << 2) & MASK_EXP; // biased and shifted left 49 bits + C2.w[1] = 0; // significand high + C2.w[0] = 0; // significand low + } else { // G0_G1 != 11 + y_exp = y.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits + if (C2.w[1] > 0x0001ed09bead87c0ull || + (C2.w[1] == 0x0001ed09bead87c0ull && + C2.w[0] > 0x378d8e63ffffffffull)) { + // y is non-canonical if coefficient is larger than 10^34 -1 + C2.w[1] = 0; + C2.w[0] = 0; + } else { // canonical + ; } } - if (((UINT64) q << 49) + x_exp + y_exp < - ((UINT64) P34 << 49) + EXP_MIN + BIN_EXP_BIAS) { - - // possible underflow - // q + ex + ey < P34 + EMIN <=> q - P34 < EMIN - ex - ey <=> q - P34 < ind - goto _underflow_path; - } - if (q <= 34) { // 2 <= q <= 34 the result is exact, and fits in 113 bits - tmp64 = x_exp + y_exp; - if (tmp64 > EXP_MAX + BIN_EXP_BIAS) { // possible overflow - ind = (tmp64 - EXP_MAX - BIN_EXP_BIAS) >> 49; - if (ind > 34 - q) { // overflow in all rounding modes - // |res| >= 10^p * 10^emax = 10^(p-1) * 10^(emax+1) - // assemble the result - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY) { - res.w[1] = sign | 0x7800000000000000ull; - res.w[0] = 0x0ull; - } else if (rnd_mode == ROUNDING_DOWN) { - if (sign) { // res = -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0ull; - } else { // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; - } - } else if (rnd_mode == ROUNDING_UP) { - if (sign) { // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; - } else { // res = +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0ull; - } - } else { // if (rnd_mode == ROUNDING_TO_ZERO) - // |res| = (10^34 - 1) * 10^6111 = +MAXFP - res.w[1] = sign | 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; - } - - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - - // set the overflow flag - *pfpsf |= OVERFLOW_EXCEPTION; - - // is_overflow = 1; - BID_RETURN (res); - } else { // tmp64 > EXP_MAX + BIN_EXP_BIAS but - // ind = ((tmp64-EXP_MAX-BIN_EXP_BIAS)>>49) <= 34 - q - // the exponent will be the maximum exponent - // multiply C by 10^ind; the result fits in 34 digits - if (ind <= 19) { // multiply by __bid_ten2k64[ind] - if (q <= 19) { // 64x64 -> 128 - __mul_64x64_to_128MACH (C, C.w[0], __bid_ten2k64[ind]); - } else { // 128 x 64 -> 128 - // may optimize to multiply 64 x 128 -> 128 - __mul_64x128_full (tmp64, C, __bid_ten2k64[ind], C); - } - } else { // if 20 <= ind <= 32 multiply by __bid_ten2k128[ind - 20] - // it must be that C.w[1] = 0, as C < 10^14 - // may optimize to multiply 64 x 128 -> 128 - __mul_64x128_full (tmp64, C, C.w[0], __bid_ten2k128[ind - 20]); - } - res.w[0] = C.w[0]; - res.w[1] = C.w[1]; - res.w[1] |= EXP_MAX; // EXP MAX - } - } else { - res.w[0] = C.w[0]; - res.w[1] = C.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - res.w[1] |= sign; - } else if (q <= 38) { // 35 <= q <= 38; exact coefficient fits in 128 bits - // C = C + 1/2 * 10^x where the result C fits in 127 bits - ind = q - 35; - tmp64 = C.w[0]; - C.w[0] = C.w[0] + __bid_midpoint64[ind]; - if (C.w[0] < tmp64) - C.w[1]++; - - // x = q - p = q - 34, 1 <= x <= 4 - // kx = 10^(-x) = __bid_ten2mk128M[ind] - // C* = (C + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 128 bits - __mul_128x128_to_256 (P256, C, __bid_ten2mk128M[ind]); - Cstar.w[1] = P256.w[3]; - Cstar.w[0] = P256.w[2]; - fstar.w[2] = Cstar.w[0] & __bid_maskhigh128M[ind]; // fstar.w[3|4|5]=0 - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128M[] and __bid_maskhigh128M[] - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk128truncM[ind], e.g. - // if x=1, T*=__bid_ten2mk128truncM[0]=0xcccccccccccccccccccccccccccccccc - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) - - // shift right C* by Ex-128 = __bid_shiftright128M[ind] - shift = __bid_shiftright128M[ind]; // 3 <= shift <= 13 - Cstar.w[0] = (Cstar.w[0] >> shift) | (Cstar.w[1] << (64 - shift)); - Cstar.w[1] = (Cstar.w[1] >> shift); - - // determine inexactness of the rounding of C* - // if (0 < f* - 1/2 < 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 > T*) then - // the result is inexact - if (fstar.w[2] > __bid_one_half128M[ind] - || (fstar.w[2] == __bid_one_half128M[ind] - && (fstar.w[1] || fstar.w[0]))) { - - // f* > 1/2 and the result may be exact - // Calculate f* - 1/2 - tmp64 = fstar.w[2] - __bid_one_half128M[ind]; - if (tmp64 || fstar.w[1] > __bid_ten2mk128truncM[ind].w[1] || - (fstar.w[1] == __bid_ten2mk128truncM[ind].w[1] && - fstar.w[0] > __bid_ten2mk128truncM[ind].w[0])) { // f* - 1/2 > 10^(-x) - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - is_inexact_lt_midpoint = 1; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - tmp_fpa = 1; - is_inexact_gt_midpoint = 1; - } - - // check for midpoints (could do this before determining inexactness) - if ((fstar.w[2] == 0) && (fstar.w[1] || fstar.w[0]) - && (fstar.w[1] < __bid_ten2mk128truncM[ind].w[1] - || (fstar.w[1] == __bid_ten2mk128truncM[ind].w[1] - && fstar.w[0] <= __bid_ten2mk128truncM[ind].w[0]))) { - - // the result is a midpoint - if (Cstar.w[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD] - // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 - Cstar.w[0]--; // Cstar.w[0] is now even - if (tmp_fpa == 1) - tmp_fpa = 0; - is_midpoint_gt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } else { // else MP in [ODD, EVEN] - is_midpoint_lt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } - } - // check for rounding overflow - if (Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e6400000000ull) { // if Cstar = 10^34 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 2) << 49); - Cstar.w[1] = 0x0000314dc6448d93ull; // Cstar = 10^33 - Cstar.w[0] = 0x38c15b0a00000000ull; - - // if rounding overflow made the exponent equal to emin, set underflow - if (tmp64 == EXP_MIN + BIN_EXP_BIAS) - *pfpsf |= UNDERFLOW_EXCEPTION; - } else { // 10^33 <= Cstar <= 10^34 - 1 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 1) << 49); // ind+1 = q-34 - } - if (tmp64 >= EXP_MAX + BIN_EXP_BIAS) { // possibble overflow - // exp >= emax for the result rounded to nearest even - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY) { - if (tmp64 > EXP_MAX + BIN_EXP_BIAS) { - - // |res| >= 10^(p-1) * 10^(emax+1) <=> exp >= emax+1 - res.w[1] = sign | 0x7800000000000000ull; // +/-inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_DOWN) { - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0xf800000000000000ull; // -inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_UP) { - if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (!sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0x7800000000000000ull; // inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else { // if (rnd_mode == ROUNDING_TO_ZERO) - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } - if (is_overflow) { // return for overflow - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - - // set the overflow flag - *pfpsf |= OVERFLOW_EXCEPTION; - - // is_overflow = 1; - BID_RETURN (res); - } - } else { - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - res.w[1] |= sign; - } else if (q <= 57) { // 39 <= q <= 57; exact coefficient takes 128-192 bits - // C = C + 1/2 * 10^x where the result C fits in 190 bits - // (10^57 - 1 + 1/2 * 10^23 can be represented with 190 bits) - // x = q - p = q - 34, 5 <= x <= 23 - // kx = 10^(-x) = __bid_ten2mk192M[ind] - // C* = (C + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 192 bits - ind = q - 39; // 0 <= ind <= 18 - tmp64 = C.w[0]; - tmp64A = C.w[1]; - - // Note: - // if 5 <= x <= 19 <=> 0 <= ind <= 14 then - // f* has 256 bits - // else // if 20 <= x <= 23 <=> 15 <= ind <= 18 then - // f* has 320 bits - if (ind <= 14) { // x - 1 = q - 35 = ind + 4 <= 18 - // add one 64-bit word - C.w[0] = C.w[0] + __bid_midpoint64[ind + 4]; - if (C.w[0] < tmp64) - C.w[1]++; - if (C.w[1] < tmp64A) - C.w[2]++; - __mul_192x192_to_384 (P384, C, __bid_ten2mk192M[ind]) - // calculate C* and f*; C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright192M[] and __bid_maskhigh192M[] - // C* has 128 bits; P384.w[5], P384.w[4], P384.w[3] need to be - // shifted right by Ex-192 = __bid_shiftright192M[ind] - shift = __bid_shiftright192M[ind]; // 16 <= shift <= 63 - Cstar.w[0] = (P384.w[3] >> shift) | (P384.w[4] << (64 - shift)); - Cstar.w[1] = (P384.w[4] >> shift) | (P384.w[5] << (64 - shift)); - - // f* has 256 bits - fstar.w[3] = P384.w[3] & __bid_maskhigh192M[ind]; - fstar.w[2] = P384.w[2]; - fstar.w[1] = P384.w[1]; - fstar.w[0] = P384.w[0]; - - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk192truncM[ind], e.g. - // if x=5, T* = __bid_ten2mk192truncM[0] = - // 0xa7c5ac471b4784230fcf80dc33721d53cddd6e04c0592103 - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) - - // determine inexactness of the rounding of C* - // if (0 < f* - 1/2 < T* ~= 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 >= T*) then - // the result is inexact - if (fstar.w[3] > __bid_one_half192M[ind] - || (fstar.w[3] == __bid_one_half192M[ind] - && (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { - - // f* > 1/2 and the result may be exact - // Calculate f* - 1/2 - tmp64 = fstar.w[3] - __bid_one_half192M[ind]; - if (tmp64 || fstar.w[2] > __bid_ten2mk192truncM[ind].w[2] || - (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] && - fstar.w[1] > __bid_ten2mk192truncM[ind].w[1]) || - (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] && - fstar.w[1] == __bid_ten2mk192truncM[ind].w[1] && - fstar.w[0] > __bid_ten2mk192truncM[ind].w[0])) { // f* - 1/2 > 10^(-x) - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - is_inexact_lt_midpoint = 1; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - tmp_fpa = 1; - is_inexact_gt_midpoint = 1; - } - - // check for midpoints (could do this before determining inexactness) - if ((fstar.w[3] == 0) - && (fstar.w[2] || fstar.w[1] || fstar.w[0]) - && (fstar.w[2] < __bid_ten2mk192truncM[ind].w[2] - || (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] - && fstar.w[1] < __bid_ten2mk192truncM[ind].w[1]) - || (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] - && fstar.w[1] == __bid_ten2mk192truncM[ind].w[1] - && fstar.w[0] <= __bid_ten2mk192truncM[ind].w[0]))) { - - // the result is a midpoint - if (Cstar.w[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD] - // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 - Cstar.w[0]--; // Cstar.w[0] is now even - if (tmp_fpa == 1) - tmp_fpa = 0; - is_midpoint_gt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } else { // else MP in [ODD, EVEN] - is_midpoint_lt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } - } - } else { // if ind >= 15 <=> x - 1 = q - 35 = ind + 4 >= 19 - // add two 64-bit words - C.w[0] = C.w[0] + __bid_midpoint128[ind - 15].w[0]; - C.w[1] = C.w[1] + __bid_midpoint128[ind - 15].w[1]; - if (C.w[0] < tmp64) - C.w[1]++; - if (C.w[1] < tmp64A) - C.w[2]++; - __mul_192x192_to_384 (P384, C, __bid_ten2mk192M[ind]) - // calculate C* and f*; C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright192M[] and __bid_maskhigh192M[] - // C* has 128 bits; P384.w[5], P384.w[4], need to be - // shifted right by Ex-256 = __bid_shiftright192M[ind] - shift = __bid_shiftright192M[ind]; // 2 <= shift <= 12 - Cstar.w[0] = (P384.w[4] >> shift) | (P384.w[5] << (64 - shift)); - Cstar.w[1] = (P384.w[5] >> shift); - - // f* has 320 bits - fstar.w[4] = P384.w[4] & __bid_maskhigh192M[ind]; - fstar.w[3] = P384.w[3]; - fstar.w[2] = P384.w[2]; - fstar.w[1] = P384.w[1]; - fstar.w[0] = P384.w[0]; - - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk192truncM[ind], e.g. - // if x=23, T* = __bid_ten2mk192truncM[18] = - // 0xc16d9a0095928a2775b7053c0f1782938d6f439b43088650 - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) - - // determine inexactness of the rounding of C* - // if (0 < f* - 1/2 < T* ~= 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 >= T*) then - // the result is inexact - if (fstar.w[4] > __bid_one_half192M[ind] - || (fstar.w[4] == __bid_one_half192M[ind] - && (fstar.w[3] || fstar.w[2] || fstar.w[1] || fstar.w[0]))) { - - // f* > 1/2 and the result may be exact - // Calculate f* - 1/2 - tmp64 = fstar.w[4] - __bid_one_half192M[ind]; - if (tmp64 || fstar.w[3] || fstar.w[2] > __bid_ten2mk192truncM[ind].w[2] || - (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] && - fstar.w[1] > __bid_ten2mk192truncM[ind].w[1]) || - (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] && - fstar.w[1] == __bid_ten2mk192truncM[ind].w[1] && - fstar.w[0] > __bid_ten2mk192truncM[ind].w[0])) { // f* - 1/2 > 10^(-x) - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - is_inexact_lt_midpoint = 1; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - tmp_fpa = 1; - is_inexact_gt_midpoint = 1; - } + p_sign = x_sign ^ y_sign; // sign of the product + + true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; + // true_p_exp, p_exp are used only for 0 * 0, 0 * f, or f * 0 + if (true_p_exp < -398) + p_exp = 0; // cannot be less than EXP_MIN + else if (true_p_exp > 369) + p_exp = (UINT64) (369 + 398) << 53; // cannot be more than EXP_MAX + else + p_exp = (UINT64) (true_p_exp + 398) << 53; + + if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || + (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { + // x = 0 or y = 0 + // the result is 0 + res = p_sign | p_exp; // preferred exponent in [EXP_MIN, EXP_MAX] + BID_RETURN (res) + } // else continue + } + // swap x and y - ensure that a NaN in x has 'higher precedence' than one in y +#if DECIMAL_CALL_BY_REFERENCE + bid64qqq_fma (&res, &y, &x, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid64qqq_fma (y, x, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // check for midpoints (could do this before determining inexactness) - if ((fstar.w[4] == 0) && (fstar.w[3] == 0) - && (fstar.w[2] || fstar.w[1] || fstar.w[0]) - && (fstar.w[2] < __bid_ten2mk192truncM[ind].w[2] - || (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] - && fstar.w[1] < __bid_ten2mk192truncM[ind].w[1]) - || (fstar.w[2] == __bid_ten2mk192truncM[ind].w[2] - && fstar.w[1] == __bid_ten2mk192truncM[ind].w[1] - && fstar.w[0] <= __bid_ten2mk192truncM[ind].w[0]))) { - // the result is a midpoint - if (Cstar.w[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD] - // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 - Cstar.w[0]--; // Cstar.w[0] is now even - if (tmp_fpa == 1) - tmp_fpa = 0; - is_midpoint_gt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } else { // else MP in [ODD, EVEN] - is_midpoint_lt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } - } - } +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dd_mul (UINT128 * pres, UINT64 * px, UINT64 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px, y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dd_mul (UINT64 x, UINT64 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; + UINT128 x1, y1; - // check for rounding overflow - if (Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e6400000000ull) { // if Cstar = 10^34 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 6) << 49); - Cstar.w[1] = 0x0000314dc6448d93ull; // Cstar = 10^33 - Cstar.w[0] = 0x38c15b0a00000000ull; - } else { // 10^33 <= Cstar <= 10^34 - 1 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 5) << 49); // ind+5 = q-34 - } - if (tmp64 >= EXP_MAX + BIN_EXP_BIAS) { // possibble overflow - // exp >= emax for the result rounded to nearest even - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY) { - if (tmp64 > EXP_MAX + BIN_EXP_BIAS) { +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_mul (&res, &x1, &y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_mul (x1, y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // |res| >= 10^(p-1) * 10^(emax+1) <=> exp >= emax+1 - res.w[1] = sign | 0x7800000000000000ull; // +/-inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_DOWN) { - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0xf800000000000000ull; // -inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_UP) { - if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (!sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0x7800000000000000ull; // inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else { // if (rnd_mode == ROUNDING_TO_ZERO) - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } - if (is_overflow) { // return for overflow - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - // set the overflow flag - *pfpsf |= OVERFLOW_EXCEPTION; +#if DECIMAL_CALL_BY_REFERENCE +void +bid128dq_mul (UINT128 * pres, UINT64 * px, UINT128 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 x = *px; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128dq_mul (UINT64 x, UINT128 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; + UINT128 x1; - // is_overflow = 1; - BID_RETURN (res)} - } else { - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - res.w[1] |= sign; - } else { // if (58 <= q <= 68) exact coefficient takes 192-226 bits - // C = C + 1/2 * 10^x where the result C fits in 226 bits - // (10^68 - 1 + 1/2 * 10^34 can be represented with 226 bits) - // x = q - p = q - 34, 24 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk256M[ind] - // C* = (C + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 256 bits - ind = q - 58; // 0 <= ind <= 10 - tmp64 = C.w[0]; - tmp64A = C.w[1]; +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_mul (&res, &x1, py + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_mul (x1, y + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // Note: - // f* has 384 bits (more than 320 bits) - // x - 1 = q - 35 = ind + 23 - // add two 64-bit words; e.g. for ind=0 <=> q=58, add 1/2*10^24 - C.w[0] = C.w[0] + __bid_midpoint128[ind + 4].w[0]; - C.w[1] = C.w[1] + __bid_midpoint128[ind + 4].w[1]; - if (C.w[0] < tmp64) - C.w[1]++; - if (C.w[1] < tmp64A) - C.w[2]++; - if (C.w[2] == 0) - C.w[3]++; - __mul_256x256_to_512 (P512, C, __bid_ten2mk256M[ind]) - // calculate C* and f*; C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright256M[] and __bid_maskhigh256M[] - // C* has 128 bits; P512.w[7], P512.w[6], P512.w[5] need to be - // shifted right by Ex-320 = __bid_shiftright256M[ind] - shift = __bid_shiftright256M[ind]; // 15 <= shift <= 48 - if (shift == 32) { - Cstar.w[0] = - ((P512.w[5] >> 31) >> 1) | ((P512.w[6] << 31) << 1); - Cstar.w[1] = - ((P512.w[6] >> 31) >> 1) | ((P512.w[7] << 31) << 1); - } else { - Cstar.w[0] = (P512.w[5] >> shift) | (P512.w[6] << (64 - shift)); - Cstar.w[1] = (P512.w[6] >> shift) | (P512.w[7] << (64 - shift)); - } - // f* has 384 bits - fstar.w[5] = P512.w[5] & __bid_maskhigh256M[ind]; - fstar.w[4] = P512.w[4]; - fstar.w[3] = P512.w[3]; - fstar.w[2] = P512.w[2]; - fstar.w[1] = P512.w[1]; - fstar.w[0] = P512.w[0]; - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk256truncM[ind], e.g. - // if x=24, T* = __bid_ten2mk256truncM[0] = - // 0x9abe14cd44753b52c4926a9672793542d78c3615cf3a050cf23472530ce6e3ec =~ - // 10^(-24) * 2^335 - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) +#if DECIMAL_CALL_BY_REFERENCE +void +bid128qd_mul (UINT128 * pres, UINT128 * px, UINT64 * py + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT64 y = *py; +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; +#endif +#else +UINT128 +bid128qd_mul (UINT128 x, UINT64 y + _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { +#endif + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; + UINT128 y1; - // determine inexactness of the rounding of C* - // if (0 < f* - 1/2 < T* ~= 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 >= T*) then - // the result is inexact - if (fstar.w[5] > __bid_one_half256M[ind] - || (fstar.w[5] == __bid_one_half256M[ind] - && (fstar.w[4] || fstar.w[3] || fstar.w[2] || fstar.w[1] - || fstar.w[0]))) { +#if DECIMAL_CALL_BY_REFERENCE + bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + bid128_mul (&res, px, &y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); + res = bid128_mul (x, y1 + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif + BID_RETURN (res); +} - // f* > 1/2 and the result may be exact - // Calculate f* - 1/2 - tmp64 = fstar.w[5] - __bid_one_half256M[ind]; // tmp64 >= 0 - if (tmp64 || fstar.w[4] || fstar.w[3] > __bid_ten2mk256truncM[ind].w[3] || - (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] && - fstar.w[2] > __bid_ten2mk256truncM[ind].w[2]) || - (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] && - fstar.w[2] == __bid_ten2mk256truncM[ind].w[2] && - fstar.w[1] > __bid_ten2mk256truncM[ind].w[1]) || - (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] && - fstar.w[2] == __bid_ten2mk256truncM[ind].w[2] && - fstar.w[1] == __bid_ten2mk256truncM[ind].w[1] && - fstar.w[0] > __bid_ten2mk256truncM[ind].w[0])) { // f* - 1/2 > 10^(-x) - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - is_inexact_lt_midpoint = 1; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - tmp_fpa = 1; - is_inexact_gt_midpoint = 1; - } - // check for midpoints (could do this before determining inexactness) - if ((fstar.w[5] == 0) && (fstar.w[4] == 0) - && (fstar.w[3] || fstar.w[2] || fstar.w[1] || fstar.w[0]) - && (fstar.w[3] < __bid_ten2mk256truncM[ind].w[3] - || (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] - && fstar.w[2] < __bid_ten2mk256truncM[ind].w[2]) - || (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] - && fstar.w[2] == __bid_ten2mk256truncM[ind].w[2] - && fstar.w[1] < __bid_ten2mk256truncM[ind].w[1]) - || (fstar.w[3] == __bid_ten2mk256truncM[ind].w[3] - && fstar.w[2] == __bid_ten2mk256truncM[ind].w[2] - && fstar.w[1] == __bid_ten2mk256truncM[ind].w[1] - && fstar.w[0] <= __bid_ten2mk256truncM[ind].w[1]))) { +// bid128_mul stands for bid128qq_mul +#if DECIMAL_CALL_BY_REFERENCE +void +bid128_mul (UINT128 * pres, UINT128 * px, + UINT128 * + py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { + UINT128 x = *px, y = *py; - // the result is a midpoint - if (Cstar.w[0] & 0x01) { // Cstar.w[0] is odd; MP in [EVEN, ODD] - // if floor(C*) is odd C = floor(C*) - 1; the result may be 0 - Cstar.w[0]--; // Cstar.w[0] is now even - if (tmp_fpa == 1) - tmp_fpa = 0; - is_midpoint_gt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } else { // else MP in [ODD, EVEN] - is_midpoint_lt_even = 1; - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - } - } - // check for rounding overflow - if (Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e6400000000ull) { // if Cstar = 10^34 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 25) << 49); - Cstar.w[1] = 0x0000314dc6448d93ull; // Cstar = 10^33 - Cstar.w[0] = 0x38c15b0a00000000ull; - } else { // 10^33 <= Cstar <= 10^34 - 1 - tmp64 = x_exp + y_exp + ((UINT64) (ind + 24) << 49); // ind+24 = q-34 - } - if (tmp64 >= EXP_MAX + BIN_EXP_BIAS) { // possibble overflow - // exp >= emax for the result rounded to nearest even - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY) { - if (tmp64 > EXP_MAX + BIN_EXP_BIAS) { +#if !DECIMAL_GLOBAL_ROUNDING + unsigned int rnd_mode = *prnd_mode; - // |res| >= 10^(p-1) * 10^(emax+1) <=> exp >= emax+1 - res.w[1] = sign | 0x7800000000000000ull; // +/-inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_DOWN) { - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0xf800000000000000ull; // -inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else if (rnd_mode == ROUNDING_UP) { - if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (!sign && ((tmp64 > EXP_MAX + BIN_EXP_BIAS) || - ((tmp64 == EXP_MAX + BIN_EXP_BIAS) && - Cstar.w[1] == 0x0001ed09bead87c0ull && - Cstar.w[0] == 0x378d8e63ffffffffull && // (10^34-1) * 10^emax - is_inexact_lt_midpoint))) { - res.w[1] = 0x7800000000000000ull; // inf - res.w[0] = 0x0ull; - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } else { // if (rnd_mode == ROUNDING_TO_ZERO) - if (!sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = +MAXFP - res.w[1] = 0x5fffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // (10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else if (sign && (tmp64 > EXP_MAX + BIN_EXP_BIAS) && - !(tmp64 == EXP_MAX + BIN_EXP_BIAS + EXP_P1 && - Cstar.w[1] == 0x0000314dc6448d93ull && - Cstar.w[0] == 0x38c15b0a00000000ull && // 10^33 * 10^(emax+1) - (is_midpoint_lt_even || is_inexact_gt_midpoint))) { - // res = -MAXFP - res.w[1] = 0xdfffed09bead87c0ull; - res.w[0] = 0x378d8e63ffffffffull; // -(10^34-1) * 10^emax - *pfpsf |= INEXACT_EXCEPTION; // set the inexact flag - *pfpsf |= OVERFLOW_EXCEPTION; // set the overflow flag - is_overflow = 1; - } else { // not overflow - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); - } - } - if (is_overflow) { // return for overflow - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; +#endif +#else +UINT128 +bid128_mul (UINT128 x, + UINT128 y _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM + _EXC_INFO_PARAM) { - // set the overflow flag - *pfpsf |= OVERFLOW_EXCEPTION; +#endif + UINT128 z = { {0x0000000000000000ull, 0x5ffe000000000000ull} + }; + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; + UINT64 x_sign, y_sign, p_sign; + UINT64 x_exp, y_exp, p_exp; + int true_p_exp; + UINT128 C1, C2; - // is_overflow = 1; - BID_RETURN (res); - } - } else { - res.w[0] = Cstar.w[0]; - res.w[1] = Cstar.w[1]; - res.w[1] |= (tmp64 - BIN_EXP_BIAS); + BID_SWAP128 (x); + BID_SWAP128 (y); + // skip cases where at least one operand is NaN or infinity + if (!(((x.w[1] & MASK_NAN) == MASK_NAN) || + ((y.w[1] & MASK_NAN) == MASK_NAN) || + ((x.w[1] & MASK_ANY_INF) == MASK_INF) || + ((y.w[1] & MASK_ANY_INF) == MASK_INF))) { + // x, y are 0 or f but not inf or NaN => unpack the arguments and check + // for non-canonical values + + x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C1.w[1] = x.w[1] & MASK_COEFF; + C1.w[0] = x.w[0]; + // check for non-canonical values - treated as zero + if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { + // G0_G1=11 => non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low + } else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull && + C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; } - res.w[1] |= sign; } - - // general correction from RN to RA, RM, RP, RZ - if (rnd_mode != ROUNDING_TO_NEAREST && !is_overflow) { // overflow is solved - x_exp = res.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1_hi = res.w[1] & MASK_COEFF; - C1_lo = res.w[0]; - if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || - ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && - is_midpoint_gt_even))) || - (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || - ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && - is_midpoint_gt_even)))) { - - // C1 = C1 + 1 - C1_lo = C1_lo + 1; - if (C1_lo == 0) { // rounding overflow in the low 64 bits - C1_hi = C1_hi + 1; - if (C1_hi == 0x0001ed09bead87c0ull - && C1_lo == 0x378d8e6400000000ull) { - - // C1 = 10^34 => rounding overflow - C1_hi = 0x0000314dc6448d93ull; - C1_lo = 0x38c15b0a00000000ull; // 10^33 - x_exp = x_exp + EXP_P1; - } - } - } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) - && ((sign && (rnd_mode == ROUNDING_UP || - rnd_mode == ROUNDING_TO_ZERO)) || - (!sign && (rnd_mode == ROUNDING_DOWN || - rnd_mode == ROUNDING_TO_ZERO)))) { - - // C1 = C1 - 1 - C1_lo = C1_lo - 1; - if (C1_lo == 0xffffffffffffffffull) - C1_hi--; - - // check if we crossed into the lower decade - if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { - // 10^33 - 1 - C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 - C1_lo = 0x378d8e63ffffffffull; - x_exp = x_exp - EXP_P1; // no underflow (TO CHECK) - } - } else { - ; // exact, the result is already correct + y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative + C2.w[1] = y.w[1] & MASK_COEFF; + C2.w[0] = y.w[0]; + // check for non-canonical values - treated as zero + if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { + // G0_G1=11 => non-canonical + y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C2.w[1] = 0; // significand high + C2.w[0] = 0; // significand low + } else { // G0_G1 != 11 + y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C2.w[1] > 0x0001ed09bead87c0ull || + (C2.w[1] == 0x0001ed09bead87c0ull && + C2.w[0] > 0x378d8e63ffffffffull)) { + // y is non-canonical if coefficient is larger than 10^34 -1 + C2.w[1] = 0; + C2.w[0] = 0; + } else { // canonical + ; } - - // assemble the result - res.w[1] = x_exp | C1_hi; - res.w[0] = C1_lo; } - res.w[1] |= sign; - BID_RETURN (res); + p_sign = x_sign ^ y_sign; // sign of the product + + true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; + // true_p_exp, p_exp are used only for 0 * 0, 0 * f, or f * 0 + if (true_p_exp < -6176) + p_exp = 0; // cannot be less than EXP_MIN + else if (true_p_exp > 6111) + p_exp = (UINT64) (6111 + 6176) << 49; // cannot be more than EXP_MAX + else + p_exp = (UINT64) (true_p_exp + 6176) << 49; + + if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || + (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { + // x = 0 or y = 0 + // the result is 0 + res.w[1] = p_sign | p_exp; // preferred exponent in [EXP_MIN, EXP_MAX] + res.w[0] = 0x0; + BID_SWAP128 (res); + BID_RETURN (res) + } // else continue } -_underflow_path: - // got here because q - P34 < ind where ind = EMIN - ex - ey - // q is the number of digits in C; ind is the [positive] exponent of the - // negative power of 10 that must multiply C in order to make the result's - // exponent equal to e_min - P34 + 1 = -6176 - ind = - (int) (((SINT64) EXP_MIN + (SINT64) BIN_EXP_BIAS - (SINT64) x_exp - - (SINT64) y_exp) >> 49); - - // q - P34 < ind => -P34 + 1 < ind => -P34 + 2 <= ind - // ind = EMIN - ex - ey < -6176 + 6176 + 6176 = 6176 - if (q < ind) { // q - ind < 0; result rounds to 0 when rounding to nearest - // set the inexact and underflow flags - *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); - res.w[1] = EXP_MIN; // EXP_MIN = 0x0 - res.w[0] = 0x0; - if (rnd_mode != ROUNDING_TO_NEAREST) { - if ((rnd_mode == ROUNDING_DOWN && sign) || - (rnd_mode == ROUNDING_UP && !sign)) - res.w[0] = 0x0000000000000001ull; - } - } else if (q == ind) { // q - ind = 0; result rounds to 0 or +/-1*10^EMIN - // set the inexact and underflow flags - *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); - - // if C <= 5*10^(q-1) then C = 0 else C = 1 - if (q <= 19) { - if (C.w[0] == __bid_midpoint64[q - 1]) { // C = 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST || (rnd_mode == ROUNDING_DOWN - && !sign) || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else if (C.w[0] < __bid_midpoint64[q - 1]) { // C < 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && !sign) - || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else { // C > 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && sign) - || (rnd_mode == ROUNDING_UP && !sign)) { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } - } - } else if (q <= 38) { // 20 <= q <= 38 - // if q <= P34 = 34 the exact result rounded to P34 digits with unbounded - // exponent will have an exponent smaller than e_min; otherwise if - // 35 <= q <= 38, it depends - if (C.w[1] == __bid_midpoint128[q - 20].w[1] && - C.w[0] == __bid_midpoint128[q - 20].w[0]) { // C = 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST || (rnd_mode == ROUNDING_DOWN - && !sign) || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else if (C.w[1] < __bid_midpoint128[q - 20].w[1] || - (C.w[1] == __bid_midpoint128[q - 20].w[1] && - C.w[0] < __bid_midpoint128[q - 20].w[0])) { // C < 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && !sign) - || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else { // C > 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && sign) - || (rnd_mode == ROUNDING_UP && !sign)) { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } - } - } else if (q <= 58) { // 39 <= q <= 58 - // Note: for q = 58 C may take 256 bits, so need to test C.w[3] - if (C.w[3] == 0x0 && C.w[2] == __bid_midpoint192[q - 39].w[2] && - C.w[1] == __bid_midpoint192[q - 39].w[1] && - C.w[0] == __bid_midpoint192[q - 39].w[0]) { // C = 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST || (rnd_mode == ROUNDING_DOWN - && !sign) || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else if ((C.w[3] == 0x0 && C.w[2] < __bid_midpoint192[q - 39].w[2]) || - (C.w[3] == 0x0 && C.w[2] == __bid_midpoint192[q - 39].w[2] && - C.w[1] < __bid_midpoint192[q - 39].w[1]) || (C.w[3] == 0x0 && - C.w[2] == __bid_midpoint192[q - 39].w[2] && - C.w[1] == __bid_midpoint192[q - 39].w[1] && - C.w[0] < __bid_midpoint192[q - 39].w[0])) { // C < 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && !sign) - || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else { // C > 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && sign) - || (rnd_mode == ROUNDING_UP && !sign)) { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } - } - } else { // if (q <= 68), i.e. 59 <= q <= 68 - if (C.w[3] == __bid_midpoint256[q - 59].w[3] && - C.w[2] == __bid_midpoint256[q - 59].w[2] && - C.w[1] == __bid_midpoint256[q - 59].w[1] && - C.w[0] == __bid_midpoint256[q - 59].w[0]) { // C = 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST || (rnd_mode == ROUNDING_DOWN - && !sign) || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else if (C.w[3] < __bid_midpoint256[q - 59].w[3] || - (C.w[3] == __bid_midpoint256[q - 59].w[3] && - C.w[2] < __bid_midpoint256[q - 59].w[2]) || - (C.w[3] == __bid_midpoint256[q - 59].w[3] && - C.w[2] == __bid_midpoint256[q - 59].w[2] && - C.w[1] < __bid_midpoint256[q - 59].w[1]) || - (C.w[3] == __bid_midpoint256[q - 59].w[3] && - C.w[2] == __bid_midpoint256[q - 59].w[2] && - C.w[1] == __bid_midpoint256[q - 59].w[1] && - C.w[0] < __bid_midpoint256[q - 59].w[0])) { // C < 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && !sign) - || (rnd_mode == ROUNDING_UP && sign) - || rnd_mode == ROUNDING_TO_ZERO) { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } - } else { // C > 0.5 * 10^emin - if (rnd_mode == ROUNDING_TO_NEAREST - || rnd_mode == ROUNDING_TIES_AWAY - || (rnd_mode == ROUNDING_DOWN && sign) - || (rnd_mode == ROUNDING_UP && !sign)) { - res.w[1] = EXP_MIN; - res.w[0] = 1; - } else { - res.w[1] = EXP_MIN; - res.w[0] = 0; - } - } - } - } else { // if 0 < q - ind < P34 <=> 1 <= q - ind <= P34 - 1 = 33 - // In general -P34 + 2 <= ind <= 6176 => -P34 + 2 <= ind < q => - // -P34 + 2 <= ind <= q - 1 - if (rnd_mode != ROUNDING_TO_NEAREST) { - is_inexact_lt_midpoint = 0; - is_inexact_gt_midpoint = 0; - is_midpoint_lt_even = 0; - is_midpoint_gt_even = 0; - } - if (ind <= 0) { // 0 <= -ind - // the result is exact - res.w[1] = (x_exp + y_exp - BIN_EXP_BIAS) | C.w[1]; - res.w[0] = C.w[0]; - - // because the result is exact the U and I status flags are not set - } else { - - // if ind > 0 <=> 1 <= ind <= q - 1; must remove ind digits - // from C, which may have up to 68 digits; note that q >= ind + 1 >= 2 - // Note: there is no underflow in some cases when the coefficient of - // the result is 10^33 or 10^33 - 1 - if (q <= 18) { // 2 <= q <= 18 - __bid_round64_2_18 (q, ind, C.w[0], &res.w[0], &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); - if (incr_exp) { - - // multiply by 10; this cannot be 10^33 - __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[1]); - res.w[1] |= (UINT64) EXP_MIN; - } else { // underflow - res.w[1] = (UINT64) EXP_MIN; - } - if (is_midpoint_lt_even || is_midpoint_gt_even - || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { - - // set the inexact and underflow flags - *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); - } - } else if (q <= 38) { // 19 <= q <= 38 - P128.w[1] = C.w[1]; - P128.w[0] = C.w[0]; - __bid_round128_19_38 (q, ind, P128, &res, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - if (incr_exp) { - - // multiply by 10 and check is this is 10^33, because in that case - // it is possible that this is not underflow - if (q - ind <= 19) { - __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[1]); - } else { // if 20 <= q - ind - __mul_128x64_to_128 (res, __bid_ten2k64[1], res); - } - if ((q - ind + 1) == P34) { // the result is 10^(P34-1) - // if the result rounded directly to P34 digits is the same, then - // there is no underflow - __bid_round128_19_38 (q, ind - 1, P128, &R128, &incr_exp1, - &is_midpoint_lt_even1, - &is_midpoint_gt_even1, - &is_inexact_lt_midpoint1, - &is_inexact_gt_midpoint1); - if (res.w[1] == R128.w[1] && res.w[0] == R128.w[0]) { - no_underflow = 1; - } - } - // res.w[1] |= (UINT64)EXP_MIN; // redundant - } else { // underflow - // res.w[1] = (UINT64)EXP_MIN | res.w[1]; // redundant - } - if (is_midpoint_lt_even || is_midpoint_gt_even - || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { - - // set the inexact and underflow flags - *pfpsf |= INEXACT_EXCEPTION; - is_inexact = 1; - if (!no_underflow) - *pfpsf |= UNDERFLOW_EXCEPTION; - } - } else if (q <= 57) { // 39 <= q <= 57 - P192.w[2] = C.w[2]; - P192.w[1] = C.w[1]; - P192.w[0] = C.w[0]; - __bid_round192_39_57 (q, ind, P192, &R192, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - if (incr_exp) { - - // multiply by 10 and check is this is 10^33, because in that case - // it is possible that this is not underflow - res.w[1] = R192.w[1]; // res has q - ind digits - res.w[0] = R192.w[0]; - if (q - ind <= 19) { - __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[1]); - } else { // if 20 <= q - ind - __mul_128x64_to_128 (res, __bid_ten2k64[1], res); - } - if ((q - ind + 1) == P34) { // the result is 10^(P34-1) - // if the result rounded directly to P34 digits is the same, then - // there is no underflow - __bid_round192_39_57 (q, ind - 1, P192, &R192, &incr_exp1, - &is_midpoint_lt_even1, - &is_midpoint_gt_even1, - &is_inexact_lt_midpoint1, - &is_inexact_gt_midpoint1); - if (res.w[1] == R192.w[1] && res.w[0] == R192.w[0]) { - no_underflow = 1; - } - } - // res.w[1] |= (UINT64)EXP_MIN; // redundant - } else { // underflow - res.w[1] = (UINT64) EXP_MIN | R192.w[1]; - res.w[0] = R192.w[0]; - } - if (is_midpoint_lt_even || is_midpoint_gt_even - || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { - - // set the inexact and underflow flags - *pfpsf |= INEXACT_EXCEPTION; - is_inexact = 1; - if (!no_underflow) - *pfpsf |= UNDERFLOW_EXCEPTION; - } - } else if (q <= 76) { // 58 <= q <= 76 (actually 58 <= q <= 68) - P256.w[3] = C.w[3]; - P256.w[2] = C.w[2]; - P256.w[1] = C.w[1]; - P256.w[0] = C.w[0]; - __bid_round256_58_76 (q, ind, P256, &R256, &incr_exp, - &is_midpoint_lt_even, &is_midpoint_gt_even, - &is_inexact_lt_midpoint, - &is_inexact_gt_midpoint); - if (incr_exp) { - - // multiply by 10 and check is this is 10^33, because in that case - // it is possible that this is not underflow - res.w[1] = R256.w[1]; // res has q - ind digits - res.w[0] = R256.w[0]; - if (q - ind <= 19) { - __mul_64x64_to_128MACH (res, res.w[0], __bid_ten2k64[1]); - } else { // if 20 <= q - ind - __mul_128x64_to_128 (res, __bid_ten2k64[1], res); - } - if ((q - ind + 1) == P34) { // the result is 10^(P34-1) - // if the result rounded directly to P34 digits is the same, then - // there is no underflow - __bid_round256_58_76 (q, ind - 1, P256, &R256, &incr_exp1, - &is_midpoint_lt_even1, - &is_midpoint_gt_even1, - &is_inexact_lt_midpoint1, - &is_inexact_gt_midpoint1); - if (res.w[1] == R256.w[1] && res.w[0] == R256.w[0]) { - no_underflow = 1; - } - } - // res.w[1] |= (UINT64)EXP_MIN; // redundant - } else { // underflow - res.w[1] = (UINT64) EXP_MIN | R256.w[1]; - res.w[0] = R256.w[0]; - } - if (is_midpoint_lt_even || is_midpoint_gt_even - || is_inexact_lt_midpoint || is_inexact_gt_midpoint) { - - // set the inexact and underflow flags - *pfpsf |= INEXACT_EXCEPTION; - is_inexact = 1; - if (!no_underflow) - *pfpsf |= UNDERFLOW_EXCEPTION; - } - } - } - // general correction from RN to RA, RM, RP, RZ - if (rnd_mode != ROUNDING_TO_NEAREST) { - x_exp = res.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - // this must be e_min - C1_hi = res.w[1] & MASK_COEFF; - C1_lo = res.w[0]; - if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || - ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && - is_midpoint_gt_even))) || - (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || - ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && - is_midpoint_gt_even)))) { - - // C1 = C1 + 1 - C1_lo = C1_lo + 1; - if (C1_lo == 0) { // rounding overflow in the low 64 bits - C1_hi = C1_hi + 1; - if (C1_hi == 0x0001ed09bead87c0ull - && C1_lo == 0x378d8e6400000000ull) { - - // C1 = 10^34 => rounding overflow (not possible) TO CHECK - C1_hi = 0x0000314dc6448d93ull; - C1_lo = 0x38c15b0a00000000ull; // 10^33 - x_exp = x_exp + EXP_P1; // this must be e_min - } - } - } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && - ((sign && - (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || - (!sign && - (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { - - // C1 = C1 - 1 (the exponent is emin already) - C1_lo = C1_lo - 1; - if (C1_lo == 0xffffffffffffffffull) - C1_hi--; - - // cannot cross into the lower decade anymore, but the result can be 0 - } else { - ; // exact, the result is already correct - } - - // no overflow is possible - // assemble the result - res.w[1] = x_exp | C1_hi; - res.w[0] = C1_lo; - - // Now fix the case where the general rounding routine returned a non-tiny - // result, but after the correction for rounding modes other than to - // nearest, the result is less in magnitude than 100...0[34] * 10^(-6176) - // (this is due to the fact that the general rounding routine works only - // with rounding to nearest) - if (is_inexact && (x_exp == EXP_MIN) - && (C1_hi < 0x0000314dc6448d93ull - || (C1_hi == 0x0000314dc6448d93ull - && C1_lo < 0x38c15b0a00000000ull))) { - *pfpsf |= UNDERFLOW_EXCEPTION; - } - } - } - res.w[1] |= sign; + BID_SWAP128 (x); + BID_SWAP128 (y); + BID_SWAP128 (z); + // swap x and y - ensure that a NaN in x has 'higher precedence' than one in y +#if DECIMAL_CALL_BY_REFERENCE + bid128_fma (&res, &y, &x, &z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#else + res = bid128_fma (y, x, z + _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG + _EXC_INFO_ARG); +#endif BID_RETURN (res); } |