diff options
Diffstat (limited to 'libgcc/config/libbid/bid128_round_integral.c')
-rw-r--r-- | libgcc/config/libbid/bid128_round_integral.c | 3200 |
1 files changed, 1622 insertions, 1578 deletions
diff --git a/libgcc/config/libbid/bid128_round_integral.c b/libgcc/config/libbid/bid128_round_integral.c index a9952a9bc8b..d517095a770 100644 --- a/libgcc/config/libbid/bid128_round_integral.c +++ b/libgcc/config/libbid/bid128_round_integral.c @@ -34,997 +34,340 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA * BID128_round_integral_exact ****************************************************************************/ -BID128_FUNCTION_ARG1(__bid128_round_integral_exact, x) +BID128_FUNCTION_ARG1 (bid128_round_integral_exact, x) - UINT128 res = {{ 0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull }}; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent + UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} + }; +UINT64 x_sign; +UINT64 x_exp; +int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) - UINT64 tmp64; - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; - UINT256 fstar; - UINT256 P256; +UINT64 tmp64; +BID_UI64DOUBLE tmp1; +unsigned int x_nr_bits; +int q, ind, shift; +UINT128 C1; +UINT256 fstar; +UINT256 P256; // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { - // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - BID_RETURN (res); +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special + if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) + } else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; } + BID_RETURN (res); } +} // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; - - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) || - ((C1.w[1] == 0x0001ed09bead87c0ull) && - (C1.w[0] > 0x378d8e63ffffffffull))) && - ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) || - ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 C1.w[1] = 0; C1.w[0] = 0; + } else { // canonical + ; } +} + // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; - } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; - } - res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} // x is not special and is not zero - switch (rnd_mode) { - case ROUNDING_TO_NEAREST: - case ROUNDING_TIES_AWAY: - // if (exp <= -(p+1)) return 0.0 - if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 - res.w[1] = x_sign | 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_DOWN: - // if (exp <= -p) return -1.0 or +0.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34 - if (x_sign) { - // if negative, return negative 1, because we know coefficient - // is non-zero (would have been caught above) - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } else { - // if positive, return positive 0, because we know coefficient is - // non-zero (would have been caught above) - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_UP: - // if (exp <= -p) return -0.0 or +1.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 - if (x_sign) { - // if negative, return negative 0, because we know the coefficient - // is non-zero (would have been caught above) - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { - // if positive, return positive 1, because we know coefficient is - // non-zero (would have been caught above) - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_TO_ZERO: - // if (exp <= -p) return -0.0 or +0.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 - res.w[1] = x_sign | 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - } - - // q = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion - x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; - } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; +switch (rnd_mode) { +case ROUNDING_TO_NEAREST: +case ROUNDING_TIES_AWAY: + // if (exp <= -(p+1)) return 0.0 + if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } - // exp < 0 - switch (rnd_mode) { - case ROUNDING_TO_NEAREST: - if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q - // need to shift right -exp digits from the coefficient; exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // chop off ind digits from the lower part of C1 - // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits - tmp64 = C1.w[0]; - if (ind <= 19) { - C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - } else { - C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - } - if (C1.w[0] < tmp64) - C1.w[1]++; - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = (C1 + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - // determine the value of res and fstar - - // determine inexactness of the rounding of C* - // if (0 < f* - 1/2 < 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 > T*) then - // the result is inexact - // Note: we are going to use __bid_ten2mk128[] instead of __bid_ten2mk128trunc[] - - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - // redundant shift = __bid_shiftright128[ind - 1]; // shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* < 10^(-x) <=> midpoint - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) - if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - ((fstar.w[1] < (__bid_ten2mk128[ind - 1].w[1])) - || ((fstar.w[1] == __bid_ten2mk128[ind - 1].w[1]) - && (fstar.w[0] < __bid_ten2mk128[ind - 1].w[0])))) { - // subract 1 to make even - if (res.w[0]-- == 0) { - res.w[1]--; - } - } - if (fstar.w[1] > 0x8000000000000000ull || - (fstar.w[1] == 0x8000000000000000ull && fstar.w[0] > 0x0ull)) { - // f* > 1/2 and the result may be exact - tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 - if ((tmp64 > __bid_ten2mk128[ind - 1].w[1] - || (tmp64 == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 - res.w[1] = (P256.w[3] >> shift); - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* < 10^(-x) <=> midpoint - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - fstar.w[2] == 0 && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) { - // subract 1 to make even - if (res.w[0]-- == 0) { - res.w[1]--; - } - } - if (fstar.w[2] > __bid_one_half128[ind - 1] - || (fstar.w[2] == __bid_one_half128[ind - 1] - && (fstar.w[1] || fstar.w[0]))) { - // f2* > 1/2 and the result may be exact - // Calculate f2* - 1/2 - tmp64 = fstar.w[2] - __bid_one_half128[ind - 1]; - if (tmp64 || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* < 10^(-x) <=> midpoint - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - fstar.w[3] == 0 && fstar.w[2] == 0 - && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) { - // subract 1 to make even - if (res.w[0]-- == 0) { - res.w[1]--; - } - } - if (fstar.w[3] > __bid_one_half128[ind - 1] - || (fstar.w[3] == __bid_one_half128[ind - 1] - && (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { - // f2* > 1/2 and the result may be exact - // Calculate f2* - 1/2 - tmp64 = fstar.w[3] - __bid_one_half128[ind - 1]; - if (tmp64 || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } - res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; - BID_RETURN (res); - } else { // if ((q + exp) < 0) <=> q < -exp - // the result is +0 or -0 - res.w[1] = x_sign | 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_TIES_AWAY: - if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q - // need to shift right -exp digits from the coefficient; exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // chop off ind digits from the lower part of C1 - // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits - tmp64 = C1.w[0]; - if (ind <= 19) { - C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - } else { - C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - } - if (C1.w[0] < tmp64) - C1.w[1]++; - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = (C1 + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk128trunc[ind], e.g. - // if x=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999 - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) - - // determine also the inexactness of the rounding of C* - // if (0 < f* - 1/2 < 10^(-x)) then - // the result is exact - // else // if (f* - 1/2 > T*) then - // the result is inexact - // Note: we are going to use __bid_ten2mk128[] instead of __bid_ten2mk128trunc[] - // shift right C* by Ex-128 = __bid_shiftright128[ind] - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - // redundant shift = __bid_shiftright128[ind - 1]; // shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - if (fstar.w[1] > 0x8000000000000000ull || - (fstar.w[1] == 0x8000000000000000ull && fstar.w[0] > 0x0ull)) { - // f* > 1/2 and the result may be exact - tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 - if ((tmp64 > __bid_ten2mk128[ind - 1].w[1] - || (tmp64 == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 - res.w[1] = (P256.w[3] >> shift); - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - if (fstar.w[2] > __bid_one_half128[ind - 1] - || (fstar.w[2] == __bid_one_half128[ind - 1] - && (fstar.w[1] || fstar.w[0]))) { - // f2* > 1/2 and the result may be exact - // Calculate f2* - 1/2 - tmp64 = fstar.w[2] - __bid_one_half128[ind - 1]; - if (tmp64 || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - if (fstar.w[3] > __bid_one_half128[ind - 1] - || (fstar.w[3] == __bid_one_half128[ind - 1] - && (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { - // f2* > 1/2 and the result may be exact - // Calculate f2* - 1/2 - tmp64 = fstar.w[3] - __bid_one_half128[ind - 1]; - if (tmp64 || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } // else the result is exact - } else { // the result is inexact; f2* <= 1/2 - // set the inexact flag - *pfpsf |= INEXACT_EXCEPTION; - } - } - // if the result was a midpoint, it was already rounded away from zero - res.w[1] |= x_sign | 0x3040000000000000ull; - BID_RETURN (res); - } else { // if ((q + exp) < 0) <=> q < -exp - // the result is +0 or -0 - res.w[1] = x_sign | 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_DOWN: - if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q - // need to shift right -exp digits from the coefficient; exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // (number of digits to be chopped off) - // chop off ind digits from the lower part of C1 - // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP - // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE - // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE - // tmp64 = C1.w[0]; - // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - // } - // if (C1.w[0] < tmp64) C1.w[1]++; - // if carry-out from C1.w[0], increment C1.w[1] - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = (C1 + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1]) - || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1] - && (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { - *pfpsf |= INEXACT_EXCEPTION; - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 0 <= shift <= 102 - res.w[1] = (P256.w[3] >> shift); - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[3] || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } - res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; - BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 - if (x_sign) { // negative rounds down to -1.0 - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } else { // positive rpunds down to +0.0 - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_UP: - if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q - // need to shift right -exp digits from the coefficient; exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // (number of digits to be chopped off) - // chop off ind digits from the lower part of C1 - // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP - // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE - // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE - // tmp64 = C1.w[0]; - // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - // } - // if (C1.w[0] < tmp64) C1.w[1]++; - // if carry-out from C1.w[0], increment C1.w[1] - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = C1 * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1]) - || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1] - && (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { - *pfpsf |= INEXACT_EXCEPTION; - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 - res.w[1] = (P256.w[3] >> shift); - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[3] || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - if (++res.w[0] == 0) { - res.w[1]++; - } - } - } - } - res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; - BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 - if (x_sign) { // negative rounds up to -0.0 - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // positive rpunds up to +1.0 - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); - } - break; - case ROUNDING_TO_ZERO: - if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q - // need to shift right -exp digits from the coefficient; exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // (number of digits to be chopped off) - // chop off ind digits from the lower part of C1 - // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate - // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP - // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE - // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE - //tmp64 = C1.w[0]; - // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - // } - // if (C1.w[0] < tmp64) C1.w[1]++; - // if carry-out from C1.w[0], increment C1.w[1] - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = (C1 + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1]) - || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1] - && (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { - *pfpsf |= INEXACT_EXCEPTION; - } - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 - res.w[1] = (P256.w[3] >> shift); - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - } - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[3] || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { - *pfpsf |= INEXACT_EXCEPTION; - } - } - res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; - BID_RETURN (res); - } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 - res.w[1] = x_sign | 0x3040000000000000ull; + break; +case ROUNDING_DOWN: + // if (exp <= -p) return -1.0 or +0.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffa000000000000ull == -34 + if (x_sign) { + // if negative, return negative 1, because we know coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { + // if positive, return positive 0, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; res.w[0] = 0x0000000000000000ull; - *pfpsf |= INEXACT_EXCEPTION; - BID_RETURN (res); } - break; - } - BID_RETURN (res); -} - -/***************************************************************************** - * BID128_round_integral_nearest_even - ****************************************************************************/ - -BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_even, x) - - UINT128 res; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent - // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) - UINT64 tmp64; - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; - // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits - UINT256 fstar; - UINT256 P256; - - // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { - // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - BID_RETURN (res); - } - } - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; - - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) - - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) - || ((C1.w[1] == 0x0001ed09bead87c0ull) - && (C1.w[0] > 0x378d8e63ffffffffull))) - && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; - C1.w[1] = 0; - C1.w[0] = 0; + *pfpsf |= INEXACT_EXCEPTION; + BID_RETURN (res); } - // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + break; +case ROUNDING_UP: + // if (exp <= -p) return -0.0 or +1.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 0, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; + // if positive, return positive 1, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; } - res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } - // x is not special and is not zero - - // if (exp <= -(p+1)) return 0 - if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + break; +case ROUNDING_TO_ZERO: + // if (exp <= -p) return -0.0 or +0.0 + if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 res.w[1] = x_sign | 0x3040000000000000ull; res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } + break; +} + // q = nr. of decimal digits in x // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - BID_RETURN (res); - } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q - // need to shift right -exp digits from the coefficient; the exp will be 0 +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} + // exp < 0 +switch (rnd_mode) { +case ROUNDING_TO_NEAREST: + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' // chop off ind digits from the lower part of C1 // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits tmp64 = C1.w[0]; if (ind <= 19) { - C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; } else { - C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; } if (C1.w[0] < tmp64) C1.w[1]++; // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] + // kx = 10^(-x) = ten2mk128[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); // determine the value of res and fstar + + // determine inexactness of the rounding of C* + // if (0 < f* - 1/2 < 10^(-x)) then + // the result is exact + // else // if (f* - 1/2 > T*) then + // the result is inexact + // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - // redundant shift = __bid_shiftright128[ind - 1]; // shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 res.w[1] = P256.w[3]; res.w[0] = P256.w[2]; // redundant fstar.w[3] = 0; // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; // fraction f* < 10^(-x) <=> midpoint // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] + // 10^(-x) from ten2mk128[] // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - ((P256.w[1] < (__bid_ten2mk128[ind - 1].w[1])) - || ((P256.w[1] == __bid_ten2mk128[ind - 1].w[1]) - && (P256.w[0] < __bid_ten2mk128[ind - 1].w[0])))) { + ((fstar.w[1] < (ten2mk128[ind - 1].w[1])) + || ((fstar.w[1] == ten2mk128[ind - 1].w[1]) + && (fstar.w[0] < ten2mk128[ind - 1].w[0])))) { // subract 1 to make even if (res.w[0]-- == 0) { res.w[1]--; } } + if (fstar.w[1] > 0x8000000000000000ull || + (fstar.w[1] == 0x8000000000000000ull + && fstar.w[0] > 0x0ull)) { + // f* > 1/2 and the result may be exact + tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 + if (tmp64 > ten2mk128[ind - 1].w[1] || + (tmp64 == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res.w[1] = (P256.w[3] >> shift); res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; fstar.w[1] = P256.w[1]; fstar.w[0] = P256.w[0]; // fraction f* < 10^(-x) <=> midpoint // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] + // 10^(-x) from ten2mk128[] if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - fstar.w[2] == 0 && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) { + fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { // subract 1 to make even if (res.w[0]-- == 0) { res.w[1]--; } } + if (fstar.w[2] > onehalf128[ind - 1] || + (fstar.w[2] == onehalf128[ind - 1] + && (fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[2] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 res.w[1] = 0; res.w[0] = P256.w[3] >> shift; - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; fstar.w[2] = P256.w[2]; fstar.w[1] = P256.w[1]; fstar.w[0] = P256.w[0]; // fraction f* < 10^(-x) <=> midpoint // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] + // 10^(-x) from ten2mk128[] if ((res.w[0] & 0x0000000000000001ull) && // is result odd? - fstar.w[3] == 0 && fstar.w[2] == 0 - && (fstar.w[1] < __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] < __bid_ten2mk128[ind - 1].w[0]))) { + fstar.w[3] == 0 && fstar.w[2] == 0 && + (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { // subract 1 to make even if (res.w[0]-- == 0) { res.w[1]--; } } + if (fstar.w[3] > onehalf128[ind - 1] || + (fstar.w[3] == onehalf128[ind - 1] && + (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[3] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } } res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; BID_RETURN (res); @@ -1032,156 +375,139 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_even, x) // the result is +0 or -0 res.w[1] = x_sign | 0x3040000000000000ull; res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } -} - -/***************************************************************************** - * BID128_round_integral_negative - ****************************************************************************/ - -BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x) - - UINT128 res; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent - // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo - // (all are UINT64) - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; - // UINT128 res is C* at first - represents up to 34 decimal digits ~ - // 113 bits - UINT256 fstar; - UINT256 P256; + break; +case ROUNDING_TIES_AWAY: + if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. + // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 + // if (0 < f* < 10^(-x)) then the result is a midpoint + // if floor(C*) is even then C* = floor(C*) - logical right + // shift; C* has p decimal digits, correct by Prop. 1) + // else if floor(C*) is odd C* = floor(C*)-1 (logical right + // shift; C* has p decimal digits, correct by Pr. 1) + // else + // C* = floor(C*) (logical right shift; C has p decimal digits, + // correct by Property 1) + // n = C* * 10^(e+x) - // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { - // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; + // determine also the inexactness of the rounding of C* + // if (0 < f* - 1/2 < 10^(-x)) then + // the result is exact + // else // if (f* - 1/2 > T*) then + // the result is inexact + // Note: we are going to use ten2mk128[] instead of ten2mk128trunc[] + // shift right C* by Ex-128 = shiftright128[ind] + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[1] > 0x8000000000000000ull || + (fstar.w[1] == 0x8000000000000000ull + && fstar.w[0] > 0x0ull)) { + // f* > 1/2 and the result may be exact + tmp64 = fstar.w[1] - 0x8000000000000000ull; // f* - 1/2 + if ((tmp64 > ten2mk128[ind - 1].w[1] || + (tmp64 == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0]))) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[2] > onehalf128[ind - 1] || + (fstar.w[2] == onehalf128[ind - 1] + && (fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[2] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + if (fstar.w[3] > onehalf128[ind - 1] || + (fstar.w[3] == onehalf128[ind - 1] && + (fstar.w[2] || fstar.w[1] || fstar.w[0]))) { + // f2* > 1/2 and the result may be exact + // Calculate f2* - 1/2 + tmp64 = fstar.w[3] - onehalf128[ind - 1]; + if (tmp64 || fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; + } // else the result is exact + } else { // the result is inexact; f2* <= 1/2 + // set the inexact flag + *pfpsf |= INEXACT_EXCEPTION; } - BID_RETURN (res); - } - } - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; - - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) - - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) - || ((C1.w[1] == 0x0001ed09bead87c0ull) - && (C1.w[0] > 0x378d8e63ffffffffull))) - && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; - C1.w[1] = 0; - C1.w[0] = 0; - } - // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; - } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; } - res.w[0] = 0x0000000000000000ull; + // if the result was a midpoint, it was already rounded away from zero + res.w[1] |= x_sign | 0x3040000000000000ull; BID_RETURN (res); - } - // x is not special and is not zero - - // if (exp <= -p) return -1.0 or +0.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 - if (x_sign) { - // if negative, return negative 1, because we know the coefficient - // is non-zero (would have been caught above) - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } else { - // if positive, return positive 0, because we know coefficient is - // non-zero (would have been caught above) - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } + } else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } - // q = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion - x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; - } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - BID_RETURN (res); - } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q - // need to shift right -exp digits from the coefficient; the exp will be 0 + break; +case ROUNDING_DOWN: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' // (number of digits to be chopped off) // chop off ind digits from the lower part of C1 @@ -1189,82 +515,85 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x) // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE - //tmp64 = C1.w[0]; + // tmp64 = C1.w[0]; // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; // } // if (C1.w[0] < tmp64) C1.w[1]++; // if carry-out from C1.w[0], increment C1.w[1] // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] + // kx = 10^(-x) = ten2mk128[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res.w[1] = P256.w[3]; res.w[0] = P256.w[2]; - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1]) - || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1] - && (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative if (++res.w[0] == 0) { res.w[1]++; } } } } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 0 <= shift <= 102 + shift = shiftright128[ind - 1]; // 0 <= shift <= 102 res.w[1] = (P256.w[3] >> shift); res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative if (++res.w[0] == 0) { res.w[1]++; } } } } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 res.w[1] = 0; res.w[0] = P256.w[3] >> shift; - // if positive, the truncated value is already the correct result - if (x_sign) { // if negative - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[3] || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative if (++res.w[0] == 0) { res.w[1]++; } @@ -1281,155 +610,12 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_negative, x) res.w[1] = 0x3040000000000000ull; res.w[0] = 0x0000000000000000ull; } + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } -} - -/***************************************************************************** - * BID128_round_integral_positive - ****************************************************************************/ - -BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x) - - UINT128 res; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent - // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo - // (all are UINT64) - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; - // UINT128 res is C* at first - represents up to 34 decimal digits ~ - // 113 bits - UINT256 fstar; - UINT256 P256; - - // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { - // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - BID_RETURN (res); - } - } - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; - - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) - - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) - || ((C1.w[1] == 0x0001ed09bead87c0ull) - && (C1.w[0] > 0x378d8e63ffffffffull))) - && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; - C1.w[1] = 0; - C1.w[0] = 0; - } - // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; - } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; - } - res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); - } - // x is not special and is not zero - - // if (exp <= -p) return -0.0 or +1.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 - if (x_sign) { - // if negative, return negative 0, because we know the coefficient - // is non-zero (would have been caught above) - res.w[1] = 0xb040000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { - // if positive, return positive 1, because we know coefficient is - // non-zero (would have been caught above) - res.w[1] = 0x3040000000000000ull; - res.w[0] = 0x0000000000000001ull; - } - BID_RETURN (res); - } - // q = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion - x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; - } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - BID_RETURN (res); - } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + break; +case ROUNDING_UP: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' // (number of digits to be chopped off) @@ -1440,80 +626,83 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x) // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE // tmp64 = C1.w[0]; // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; // } // if (C1.w[0] < tmp64) C1.w[1]++; // if carry-out from C1.w[0], increment C1.w[1] // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] + // kx = 10^(-x) = ten2mk128[ind - 1] // C* = C1 * 10^(-x) // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res.w[1] = P256.w[3]; res.w[0] = P256.w[2]; - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - // redundant fstar.w[3] = 0; - // redundant fstar.w[2] = 0; - // redundant fstar.w[1] = P256.w[1]; - // redundant fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if ((P256.w[1] > __bid_ten2mk128[ind - 1].w[1]) - || (P256.w[1] == __bid_ten2mk128[ind - 1].w[1] - && (P256.w[0] >= __bid_ten2mk128[ind - 1].w[0]))) { + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive if (++res.w[0] == 0) { res.w[1]++; } } } } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res.w[1] = (P256.w[3] >> shift); res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - // redundant fstar.w[3] = 0; - fstar.w[2] = P256.w[2] & __bid_maskhigh128[ind - 1]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[2] || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive if (++res.w[0] == 0) { res.w[1]++; } } } } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 res.w[1] = 0; res.w[0] = P256.w[3] >> shift; - // if negative, the truncated value is already the correct result - if (!x_sign) { // if positive - fstar.w[3] = P256.w[3] & __bid_maskhigh128[ind - 1]; - fstar.w[2] = P256.w[2]; - fstar.w[1] = P256.w[1]; - fstar.w[0] = P256.w[0]; - // fraction f* > 10^(-x) <=> inexact - // f* is in the right position to be compared with - // 10^(-x) from __bid_ten2mk128[] - if (fstar.w[3] || fstar.w[2] - || fstar.w[1] > __bid_ten2mk128[ind - 1].w[1] - || (fstar.w[1] == __bid_ten2mk128[ind - 1].w[1] - && fstar.w[0] >= __bid_ten2mk128[ind - 1].w[0])) { + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive if (++res.w[0] == 0) { res.w[1]++; } @@ -1530,146 +719,13 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_positive, x) res.w[1] = 0x3040000000000000ull; res.w[0] = 0x0000000000000001ull; } + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } -} - -/***************************************************************************** - * BID128_round_integral_zero - ****************************************************************************/ - -BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_zero, x) - - UINT128 res; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent - // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo - // (all are UINT64) - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; - // UINT128 res is C* at first - represents up to 34 decimal digits ~ - // 113 bits - UINT256 P256; - - // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { - // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } - BID_RETURN (res); - } - } - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; - - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) - - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) - || ((C1.w[1] == 0x0001ed09bead87c0ull) - && (C1.w[0] > 0x378d8e63ffffffffull))) - && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; - C1.w[1] = 0; - C1.w[0] = 0; - } - // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; - } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; - } - res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); - } - // x is not special and is not zero - - // if (exp <= -p) return -0.0 or +0.0 - if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 - res.w[1] = x_sign | 0x3040000000000000ull; - res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); - } - // q = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion - x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; - } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; - BID_RETURN (res); - } else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q - // need to shift right -exp digits from the coefficient; the exp will be 0 + break; +case ROUNDING_TO_ZERO: + if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' // (number of digits to be chopped off) // chop off ind digits from the lower part of C1 @@ -1679,234 +735,1222 @@ BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_zero, x) // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE //tmp64 = C1.w[0]; // if (ind <= 19) { - // C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; // } else { - // C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - // C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; // } // if (C1.w[0] < tmp64) C1.w[1]++; // if carry-out from C1.w[0], increment C1.w[1] // calculate C* and f* // C* is actually floor(C*) in this case // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] + // shiftright128[] and maskhigh128[] // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] + // kx = 10^(-x) = ten2mk128[ind - 1] // C* = (C1 + 1/2 * 10^x) * 10^(-x) // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 res.w[1] = P256.w[3]; res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + *pfpsf |= INEXACT_EXCEPTION; + } } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 res.w[1] = (P256.w[3] >> shift); res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + } } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 res.w[1] = 0; res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + *pfpsf |= INEXACT_EXCEPTION; + } } res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; BID_RETURN (res); } else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 res.w[1] = x_sign | 0x3040000000000000ull; res.w[0] = 0x0000000000000000ull; + *pfpsf |= INEXACT_EXCEPTION; BID_RETURN (res); } + break; +} + +BID_RETURN (res); } /***************************************************************************** - * BID128_round_integral_nearest_away + * BID128_round_integral_nearest_even ****************************************************************************/ -BID128_FUNCTION_ARG1_NORND(__bid128_round_integral_nearest_away, x) +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_even, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo (all are UINT64) + UINT64 tmp64; + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ 113 bits + UINT256 fstar; + UINT256 P256; - UINT128 res; - UINT64 x_sign; - UINT64 x_exp; - int exp; // unbiased exponent + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -(p+1)) return 0 +if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi + || (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // determine the value of res and fstar + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + // redundant shift = shiftright128[ind - 1]; // shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + // if 0 < fstar < 10^(-x), subtract 1 if odd (for rounding to even) + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + ((P256.w[1] < (ten2mk128[ind - 1].w[1])) + || ((P256.w[1] == ten2mk128[ind - 1].w[1]) + && (P256.w[0] < ten2mk128[ind - 1].w[0])))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[2] == 0 && (fstar.w[1] < ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* < 10^(-x) <=> midpoint + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((res.w[0] & 0x0000000000000001ull) && // is result odd? + fstar.w[3] == 0 && fstar.w[2] == 0 + && (fstar.w[1] < ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] < ten2mk128[ind - 1].w[0]))) { + // subract 1 to make even + if (res.w[0]-- == 0) { + res.w[1]--; + } + } + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_negative + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_negative, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo // (all are UINT64) - UINT64 tmp64; - BID_UI64DOUBLE tmp1; - unsigned int x_nr_bits; - int q, ind, shift; - UINT128 C1; + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; // UINT128 res is C* at first - represents up to 34 decimal digits ~ // 113 bits - // UINT256 fstar; - UINT256 P256; + UINT256 fstar; + UINT256 P256; // check for NaN or Infinity - if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { // x is special - if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN - if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN - // set invalid flag - *pfpsf |= INVALID_EXCEPTION; - // return Quiet (SNaN) - res.w[1] = x.w[1] & 0xfdffffffffffffffull; - res.w[0] = x.w[0]; - } else { // x is QNaN - // return the input QNaN - res.w[1] = x.w[1]; - res.w[0] = x.w[0]; +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -p) return -1.0 or +0.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 1, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { + // if positive, return positive 0, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + //tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + if (++res.w[0] == 0) { + res.w[1]++; + } } - BID_RETURN (res); - } else { // x is not a NaN, so it must be infinity - if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf - // return +inf - res.w[1] = 0x7800000000000000ull; - res.w[0] = 0x0000000000000000ull; - } else { // x is -inf - // return -inf - res.w[1] = 0xf800000000000000ull; - res.w[0] = 0x0000000000000000ull; + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 0 <= shift <= 102 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + // if positive, the truncated value is already the correct result + if (x_sign) { // if negative + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } } - BID_RETURN (res); } } - // unpack x - x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative - x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bit positions - C1.w[1] = x.w[1] & MASK_COEFF; - C1.w[0] = x.w[0]; + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds down to -1.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000001ull; + } else { // positive rpunds down to +0.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_positive + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_positive, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + UINT256 fstar; + UINT256 P256; - // test for non-canonical values: - // - values whose encoding begins with x00, x01, or x10 and whose - // coefficient is larger than 10^34 -1, or - // - values whose encoding begins with x1100, x1101, x1110 (if NaNs - // and infinitis were eliminated already this test is reduced to - // checking for x10x) + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; - // test for non-canonical values of the argument x - if ((((C1.w[1] > 0x0001ed09bead87c0ull) - || ((C1.w[1] == 0x0001ed09bead87c0ull) - && (C1.w[0] > 0x378d8e63ffffffffull))) - && ((x.w[1] & 0x6000000000000000ull) != 0x6000000000000000ull)) - || ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull)) { - x.w[1] = x.w[1] & 0x8000000000000000ull; // preserve the sign bit - x.w[0] = 0; - x_exp = 0x1820ull << 49; + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 C1.w[1] = 0; C1.w[0] = 0; + } else { // canonical + ; } +} + // test for input equal to zero - if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { - // x is 0 - // return 0 preserving the sign bit and the preferred exponent - // of MAX(Q(x), 0) - if (x_exp <= (0x1820ull << 49)) { - res.w[1] = - (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; - } else { - res.w[1] = x.w[1] & 0xfffe000000000000ull; - } - res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} // x is not special and is not zero - // if (exp <= -(p+1)) return 0.0 - if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 - res.w[1] = x_sign | 0x3040000000000000ull; + // if (exp <= -p) return -0.0 or +1.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + if (x_sign) { + // if negative, return negative 0, because we know the coefficient + // is non-zero (would have been caught above) + res.w[1] = 0xb040000000000000ull; res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); + } else { + // if positive, return positive 1, because we know coefficient is + // non-zero (would have been caught above) + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; } + BID_RETURN (res); +} // q = nr. of decimal digits in x - // determine first the nr. of bits in x - if (C1.w[1] == 0) { - if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 - // split the 64-bit value in two 32-bit halves to avoid rounding errors - if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 - tmp1.d = (double) (C1.w[0] >> 32); // exact conversion - x_nr_bits = - 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } else { // x < 2^32 - tmp1.d = (double) (C1.w[0]); // exact conversion - x_nr_bits = - 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); - } - } else { // if x < 2^53 - tmp1.d = (double) C1.w[0]; // exact conversion + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion x_nr_bits = 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); } - } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) - tmp1.d = (double) C1.w[1]; // exact conversion + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion x_nr_bits = - 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + // tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = C1 * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + // redundant fstar.w[3] = 0; + // redundant fstar.w[2] = 0; + // redundant fstar.w[1] = P256.w[1]; + // redundant fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if ((P256.w[1] > ten2mk128[ind - 1].w[1]) + || (P256.w[1] == ten2mk128[ind - 1].w[1] + && (P256.w[0] >= ten2mk128[ind - 1].w[0]))) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + // redundant fstar.w[3] = 0; + fstar.w[2] = P256.w[2] & maskhigh128[ind - 1]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[2] || fstar.w[1] > ten2mk128[ind - 1].w[1] || + (fstar.w[1] == ten2mk128[ind - 1].w[1] && + fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + // if negative, the truncated value is already the correct result + if (!x_sign) { // if positive + fstar.w[3] = P256.w[3] & maskhigh128[ind - 1]; + fstar.w[2] = P256.w[2]; + fstar.w[1] = P256.w[1]; + fstar.w[0] = P256.w[0]; + // fraction f* > 10^(-x) <=> inexact + // f* is in the right position to be compared with + // 10^(-x) from ten2mk128[] + if (fstar.w[3] || fstar.w[2] + || fstar.w[1] > ten2mk128[ind - 1].w[1] + || (fstar.w[1] == ten2mk128[ind - 1].w[1] + && fstar.w[0] >= ten2mk128[ind - 1].w[0])) { + if (++res.w[0] == 0) { + res.w[1]++; + } + } + } } - q = __bid_nr_digits[x_nr_bits - 1].digits; - if (q == 0) { - q = __bid_nr_digits[x_nr_bits - 1].digits1; - if (C1.w[1] > __bid_nr_digits[x_nr_bits - 1].threshold_hi - || (C1.w[1] == __bid_nr_digits[x_nr_bits - 1].threshold_hi - && C1.w[0] >= __bid_nr_digits[x_nr_bits - 1].threshold_lo)) - q++; + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 + if (x_sign) { // negative rounds up to -0.0 + res.w[1] = 0xb040000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // positive rpunds up to +1.0 + res.w[1] = 0x3040000000000000ull; + res.w[0] = 0x0000000000000001ull; } - exp = (x_exp >> 49) - 6176; - if (exp >= 0) { // -exp <= 0 - // the argument is an integer already - res.w[1] = x.w[1]; + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_zero + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_zero, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] res.w[0] = x.w[0]; - BID_RETURN (res); - } else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q - // need to shift right -exp digits from the coefficient; the exp will be 0 - ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' - // chop off ind digits from the lower part of C1 - // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits - tmp64 = C1.w[0]; - if (ind <= 19) { - C1.w[0] = C1.w[0] + __bid_midpoint64[ind - 1]; - } else { - C1.w[0] = C1.w[0] + __bid_midpoint128[ind - 20].w[0]; - C1.w[1] = C1.w[1] + __bid_midpoint128[ind - 20].w[1]; - } - if (C1.w[0] < tmp64) - C1.w[1]++; - // calculate C* and f* - // C* is actually floor(C*) in this case - // C* and f* need shifting and masking, as shown by - // __bid_shiftright128[] and __bid_maskhigh128[] - // 1 <= x <= 34 - // kx = 10^(-x) = __bid_ten2mk128[ind - 1] - // C* = (C1 + 1/2 * 10^x) * 10^(-x) - // the approximation of 10^(-x) was rounded up to 118 bits - __mul_128x128_to_256 (P256, C1, __bid_ten2mk128[ind - 1]); - // the top Ex bits of 10^(-x) are T* = __bid_ten2mk128trunc[ind], e.g. - // if x=1, T*=__bid_ten2mk128trunc[0]=0x19999999999999999999999999999999 - // if (0 < f* < 10^(-x)) then the result is a midpoint - // if floor(C*) is even then C* = floor(C*) - logical right - // shift; C* has p decimal digits, correct by Prop. 1) - // else if floor(C*) is odd C* = floor(C*)-1 (logical right - // shift; C* has p decimal digits, correct by Pr. 1) - // else - // C* = floor(C*) (logical right shift; C has p decimal digits, - // correct by Property 1) - // n = C* * 10^(e+x) + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; - // shift right C* by Ex-128 = __bid_shiftright128[ind] - if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 - res.w[1] = P256.w[3]; - res.w[0] = P256.w[2]; - } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 - shift = __bid_shiftright128[ind - 1]; // 3 <= shift <= 63 - res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); - res.w[1] = (P256.w[3] >> shift); - } else { // 22 <= ind - 1 <= 33 - shift = __bid_shiftright128[ind - 1]; // 2 <= shift <= 38 - res.w[1] = 0; - res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38 + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -p) return -0.0 or +0.0 +if (x_exp <= 0x2ffc000000000000ull) { // 0x2ffc000000000000ull == -34 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); } - // if the result was a midpoint, it was already rounded away from zero - res.w[1] |= x_sign | 0x3040000000000000ull; - BID_RETURN (res); - } else { // if ((q + exp) < 0) <=> q < -exp - // the result is +0 or -0 - res.w[1] = x_sign | 0x3040000000000000ull; + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) > 0) { // exp < 0 and 1 <= -exp < q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // (number of digits to be chopped off) + // chop off ind digits from the lower part of C1 + // FOR ROUND_TO_NEAREST, WE ADD 1/2 ULP(y) then truncate + // FOR ROUND_TO_ZERO, WE DON'T NEED TO ADD 1/2 ULP + // FOR ROUND_TO_POSITIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF POSITIVE + // FOR ROUND_TO_NEGATIVE_INFINITY, WE TRUNCATE, THEN ADD 1 IF NEGATIVE + //tmp64 = C1.w[0]; + // if (ind <= 19) { + // C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + // } else { + // C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + // C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + // } + // if (C1.w[0] < tmp64) C1.w[1]++; + // if carry-out from C1.w[0], increment C1.w[1] + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[1] = (P256.w[3] >> shift); + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1] - 64; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = P256.w[3] >> shift; + } + res.w[1] = x_sign | 0x3040000000000000ull | res.w[1]; + BID_RETURN (res); +} else { // if exp < 0 and q + exp <= 0 the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} +} + +/***************************************************************************** + * BID128_round_integral_nearest_away + ****************************************************************************/ + +BID128_FUNCTION_ARG1_NORND (bid128_round_integral_nearest_away, x) + + UINT128 res; + UINT64 x_sign; + UINT64 x_exp; + int exp; // unbiased exponent + // Note: C1.w[1], C1.w[0] represent x_signif_hi, x_signif_lo + // (all are UINT64) + UINT64 tmp64; + BID_UI64DOUBLE tmp1; + unsigned int x_nr_bits; + int q, ind, shift; + UINT128 C1; + // UINT128 res is C* at first - represents up to 34 decimal digits ~ + // 113 bits + // UINT256 fstar; + UINT256 P256; + + // check for NaN or Infinity +if ((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) { + // x is special +if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN + // if x = NaN, then res = Q (x) + // check first for non-canonical NaN payload + if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || + (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && + (x.w[0] > 0x38c15b09ffffffffull))) { + x.w[1] = x.w[1] & 0xffffc00000000000ull; + x.w[0] = 0x0ull; + } + if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN + // set invalid flag + *pfpsf |= INVALID_EXCEPTION; + // return quiet (x) + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] + res.w[0] = x.w[0]; + } else { // x is QNaN + // return x + res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] + res.w[0] = x.w[0]; + } + BID_RETURN (res) +} else { // x is not a NaN, so it must be infinity + if ((x.w[1] & MASK_SIGN) == 0x0ull) { // x is +inf + // return +inf + res.w[1] = 0x7800000000000000ull; + res.w[0] = 0x0000000000000000ull; + } else { // x is -inf + // return -inf + res.w[1] = 0xf800000000000000ull; res.w[0] = 0x0000000000000000ull; - BID_RETURN (res); } + BID_RETURN (res); +} +} + // unpack x +x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative +C1.w[1] = x.w[1] & MASK_COEFF; +C1.w[0] = x.w[0]; + + // check for non-canonical values (treated as zero) +if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 + // non-canonical + x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits + C1.w[1] = 0; // significand high + C1.w[0] = 0; // significand low +} else { // G0_G1 != 11 + x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits + if (C1.w[1] > 0x0001ed09bead87c0ull || + (C1.w[1] == 0x0001ed09bead87c0ull + && C1.w[0] > 0x378d8e63ffffffffull)) { + // x is non-canonical if coefficient is larger than 10^34 -1 + C1.w[1] = 0; + C1.w[0] = 0; + } else { // canonical + ; + } +} + + // test for input equal to zero +if ((C1.w[1] == 0x0ull) && (C1.w[0] == 0x0ull)) { + // x is 0 + // return 0 preserving the sign bit and the preferred exponent + // of MAX(Q(x), 0) + if (x_exp <= (0x1820ull << 49)) { + res.w[1] = (x.w[1] & 0x8000000000000000ull) | 0x3040000000000000ull; + } else { + res.w[1] = x_sign | x_exp; + } + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // x is not special and is not zero + + // if (exp <= -(p+1)) return 0.0 +if (x_exp <= 0x2ffa000000000000ull) { // 0x2ffa000000000000ull == -35 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} + // q = nr. of decimal digits in x + // determine first the nr. of bits in x +if (C1.w[1] == 0) { + if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 + // split the 64-bit value in two 32-bit halves to avoid rounding errors + if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 + tmp1.d = (double) (C1.w[0] >> 32); // exact conversion + x_nr_bits = + 33 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } else { // x < 2^32 + tmp1.d = (double) (C1.w[0]); // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } + } else { // if x < 2^53 + tmp1.d = (double) C1.w[0]; // exact conversion + x_nr_bits = + 1 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); + } +} else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) + tmp1.d = (double) C1.w[1]; // exact conversion + x_nr_bits = + 65 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); +} + +q = nr_digits[x_nr_bits - 1].digits; +if (q == 0) { + q = nr_digits[x_nr_bits - 1].digits1; + if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || + (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && + C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) + q++; +} +exp = (x_exp >> 49) - 6176; +if (exp >= 0) { // -exp <= 0 + // the argument is an integer already + res.w[1] = x.w[1]; + res.w[0] = x.w[0]; + BID_RETURN (res); +} else if ((q + exp) >= 0) { // exp < 0 and 1 <= -exp <= q + // need to shift right -exp digits from the coefficient; the exp will be 0 + ind = -exp; // 1 <= ind <= 34; ind is a synonym for 'x' + // chop off ind digits from the lower part of C1 + // C1 = C1 + 1/2 * 10^x where the result C1 fits in 127 bits + tmp64 = C1.w[0]; + if (ind <= 19) { + C1.w[0] = C1.w[0] + midpoint64[ind - 1]; + } else { + C1.w[0] = C1.w[0] + midpoint128[ind - 20].w[0]; + C1.w[1] = C1.w[1] + midpoint128[ind - 20].w[1]; + } + if (C1.w[0] < tmp64) + C1.w[1]++; + // calculate C* and f* + // C* is actually floor(C*) in this case + // C* and f* need shifting and masking, as shown by + // shiftright128[] and maskhigh128[] + // 1 <= x <= 34 + // kx = 10^(-x) = ten2mk128[ind - 1] + // C* = (C1 + 1/2 * 10^x) * 10^(-x) + // the approximation of 10^(-x) was rounded up to 118 bits + __mul_128x128_to_256 (P256, C1, ten2mk128[ind - 1]); + // the top Ex bits of 10^(-x) are T* = ten2mk128trunc[ind], e.g. + // if x=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 + // if (0 < f* < 10^(-x)) then the result is a midpoint + // if floor(C*) is even then C* = floor(C*) - logical right + // shift; C* has p decimal digits, correct by Prop. 1) + // else if floor(C*) is odd C* = floor(C*)-1 (logical right + // shift; C* has p decimal digits, correct by Pr. 1) + // else + // C* = floor(C*) (logical right shift; C has p decimal digits, + // correct by Property 1) + // n = C* * 10^(e+x) + + // shift right C* by Ex-128 = shiftright128[ind] + if (ind - 1 <= 2) { // 0 <= ind - 1 <= 2 => shift = 0 + res.w[1] = P256.w[3]; + res.w[0] = P256.w[2]; + } else if (ind - 1 <= 21) { // 3 <= ind - 1 <= 21 => 3 <= shift <= 63 + shift = shiftright128[ind - 1]; // 3 <= shift <= 63 + res.w[0] = (P256.w[3] << (64 - shift)) | (P256.w[2] >> shift); + res.w[1] = (P256.w[3] >> shift); + } else { // 22 <= ind - 1 <= 33 + shift = shiftright128[ind - 1]; // 2 <= shift <= 38 + res.w[1] = 0; + res.w[0] = (P256.w[3] >> (shift - 64)); // 2 <= shift - 64 <= 38 + } + // if the result was a midpoint, it was already rounded away from zero + res.w[1] |= x_sign | 0x3040000000000000ull; + BID_RETURN (res); +} else { // if ((q + exp) < 0) <=> q < -exp + // the result is +0 or -0 + res.w[1] = x_sign | 0x3040000000000000ull; + res.w[0] = 0x0000000000000000ull; + BID_RETURN (res); +} } |