/* Fused multiply-add. Copyright (C) 2007, 2009, 2011-2023 Free Software Foundation, Inc. This file is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ /* Written by Bruno Haible , 2011. */ #if ! defined USE_LONG_DOUBLE # include #endif /* Specification. */ #include #include #include #if HAVE_FEGETROUND # include #endif #include "float+.h" #include "integer_length.h" #ifdef USE_LONG_DOUBLE # define FUNC fmal # define DOUBLE long double # define FREXP frexpl # define LDEXP ldexpl # define MIN_EXP LDBL_MIN_EXP # define MANT_BIT LDBL_MANT_BIT # define L_(literal) literal##L #elif ! defined USE_FLOAT # define FUNC fma # define DOUBLE double # define FREXP frexp # define LDEXP ldexp # define MIN_EXP DBL_MIN_EXP # define MANT_BIT DBL_MANT_BIT # define L_(literal) literal #else /* defined USE_FLOAT */ # define FUNC fmaf # define DOUBLE float # define FREXP frexpf # define LDEXP ldexpf # define MIN_EXP FLT_MIN_EXP # define MANT_BIT FLT_MANT_BIT # define L_(literal) literal##f #endif #undef MAX #define MAX(a,b) ((a) > (b) ? (a) : (b)) #undef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) /* MSVC with option -fp:strict refuses to compile constant initializers that contain floating-point operations. Pacify this compiler. */ #if defined _MSC_VER && !defined __clang__ # pragma fenv_access (off) #endif /* Work around GCC 4.2.1 bug on OpenBSD 5.1/SPARC64. */ #if defined __GNUC__ && defined __sparc__ # define VOLATILE volatile #else # define VOLATILE #endif /* It is possible to write an implementation of fused multiply-add with floating-point operations alone. See Sylvie Boldo, Guillaume Melquiond: Emulation of FMA and correctly-rounded sums: proved algorithms using rounding to odd. But is it complicated. Here we take the simpler (and probably slower) approach of doing multi-precision arithmetic. */ /* We use the naming conventions of GNU gmp, but vastly simpler (and slower) algorithms. */ typedef unsigned int mp_limb_t; #define GMP_LIMB_BITS 32 static_assert (sizeof (mp_limb_t) * CHAR_BIT == GMP_LIMB_BITS); typedef unsigned long long mp_twolimb_t; #define GMP_TWOLIMB_BITS 64 static_assert (sizeof (mp_twolimb_t) * CHAR_BIT == GMP_TWOLIMB_BITS); /* Number of limbs needed for a single DOUBLE. */ #define NLIMBS1 ((MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS) /* Number of limbs needed for the accumulator. */ #define NLIMBS3 (3 * NLIMBS1 + 1) /* Assuming 0.5 <= x < 1.0: Convert the mantissa (x * 2^DBL_MANT_BIT) to a sequence of limbs. */ static void decode (DOUBLE x, mp_limb_t limbs[NLIMBS1]) { /* I'm not sure whether it's safe to cast a 'double' value between 2^31 and 2^32 to 'unsigned int', therefore play safe and cast only 'double' values between 0 and 2^31 (to 'unsigned int' or 'int', doesn't matter). So, we split the MANT_BIT bits of x into a number of chunks of at most 31 bits. */ enum { chunk_count = (MANT_BIT - 1) / 31 + 1 }; /* Variables used for storing the bits limb after limb. */ mp_limb_t *p = limbs + NLIMBS1 - 1; mp_limb_t accu = 0; unsigned int bits_needed = MANT_BIT - (NLIMBS1 - 1) * GMP_LIMB_BITS; /* The bits bits_needed-1...0 need to be ORed into the accu. 1 <= bits_needed <= GMP_LIMB_BITS. */ /* Unroll the first 4 loop rounds. */ if (chunk_count > 0) { /* Here we still have MANT_BIT-0*31 bits to extract from x. */ enum { chunk_bits = MIN (31, MANT_BIT - 0 * 31) }; /* > 0, <= 31 */ mp_limb_t d; x *= (mp_limb_t) 1 << chunk_bits; d = (int) x; /* 0 <= d < 2^chunk_bits. */ x -= d; if (!(x >= L_(0.0) && x < L_(1.0))) abort (); if (bits_needed < chunk_bits) { /* store bits_needed bits */ accu |= d >> (chunk_bits - bits_needed); *p = accu; if (p == limbs) goto done; p--; /* hold (chunk_bits - bits_needed) bits */ accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed)); bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed); } else { /* store chunk_bits bits */ accu |= d << (bits_needed - chunk_bits); bits_needed -= chunk_bits; if (bits_needed == 0) { *p = accu; if (p == limbs) goto done; p--; accu = 0; bits_needed = GMP_LIMB_BITS; } } } if (chunk_count > 1) { /* Here we still have MANT_BIT-1*31 bits to extract from x. */ enum { chunk_bits = MIN (31, MAX (MANT_BIT - 1 * 31, 0)) }; /* > 0, <= 31 */ mp_limb_t d; x *= (mp_limb_t) 1 << chunk_bits; d = (int) x; /* 0 <= d < 2^chunk_bits. */ x -= d; if (!(x >= L_(0.0) && x < L_(1.0))) abort (); if (bits_needed < chunk_bits) { /* store bits_needed bits */ accu |= d >> (chunk_bits - bits_needed); *p = accu; if (p == limbs) goto done; p--; /* hold (chunk_bits - bits_needed) bits */ accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed)); bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed); } else { /* store chunk_bits bits */ accu |= d << (bits_needed - chunk_bits); bits_needed -= chunk_bits; if (bits_needed == 0) { *p = accu; if (p == limbs) goto done; p--; accu = 0; bits_needed = GMP_LIMB_BITS; } } } if (chunk_count > 2) { /* Here we still have MANT_BIT-2*31 bits to extract from x. */ enum { chunk_bits = MIN (31, MAX (MANT_BIT - 2 * 31, 0)) }; /* > 0, <= 31 */ mp_limb_t d; x *= (mp_limb_t) 1 << chunk_bits; d = (int) x; /* 0 <= d < 2^chunk_bits. */ x -= d; if (!(x >= L_(0.0) && x < L_(1.0))) abort (); if (bits_needed < chunk_bits) { /* store bits_needed bits */ accu |= d >> (chunk_bits - bits_needed); *p = accu; if (p == limbs) goto done; p--; /* hold (chunk_bits - bits_needed) bits */ accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed)); bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed); } else { /* store chunk_bits bits */ accu |= d << (bits_needed - chunk_bits); bits_needed -= chunk_bits; if (bits_needed == 0) { *p = accu; if (p == limbs) goto done; p--; accu = 0; bits_needed = GMP_LIMB_BITS; } } } if (chunk_count > 3) { /* Here we still have MANT_BIT-3*31 bits to extract from x. */ enum { chunk_bits = MIN (31, MAX (MANT_BIT - 3 * 31, 0)) }; /* > 0, <= 31 */ mp_limb_t d; x *= (mp_limb_t) 1 << chunk_bits; d = (int) x; /* 0 <= d < 2^chunk_bits. */ x -= d; if (!(x >= L_(0.0) && x < L_(1.0))) abort (); if (bits_needed < chunk_bits) { /* store bits_needed bits */ accu |= d >> (chunk_bits - bits_needed); *p = accu; if (p == limbs) goto done; p--; /* hold (chunk_bits - bits_needed) bits */ accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed)); bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed); } else { /* store chunk_bits bits */ accu |= d << (bits_needed - chunk_bits); bits_needed -= chunk_bits; if (bits_needed == 0) { *p = accu; if (p == limbs) goto done; p--; accu = 0; bits_needed = GMP_LIMB_BITS; } } } if (chunk_count > 4) { /* Here we still have MANT_BIT-4*31 bits to extract from x. */ /* Generic loop. */ size_t k; for (k = 4; k < chunk_count; k++) { size_t chunk_bits = MIN (31, MANT_BIT - k * 31); /* > 0, <= 31 */ mp_limb_t d; x *= (mp_limb_t) 1 << chunk_bits; d = (int) x; /* 0 <= d < 2^chunk_bits. */ x -= d; if (!(x >= L_(0.0) && x < L_(1.0))) abort (); if (bits_needed < chunk_bits) { /* store bits_needed bits */ accu |= d >> (chunk_bits - bits_needed); *p = accu; if (p == limbs) goto done; p--; /* hold (chunk_bits - bits_needed) bits */ accu = d << (GMP_LIMB_BITS - (chunk_bits - bits_needed)); bits_needed = GMP_LIMB_BITS - (chunk_bits - bits_needed); } else { /* store chunk_bits bits */ accu |= d << (bits_needed - chunk_bits); bits_needed -= chunk_bits; if (bits_needed == 0) { *p = accu; if (p == limbs) goto done; p--; accu = 0; bits_needed = GMP_LIMB_BITS; } } } } /* We shouldn't get here. */ abort (); done: ; #ifndef USE_LONG_DOUBLE /* On FreeBSD 6.1/x86, 'long double' numbers sometimes have excess precision. */ if (!(x == L_(0.0))) abort (); #endif } /* Multiply two sequences of limbs. */ static void multiply (mp_limb_t xlimbs[NLIMBS1], mp_limb_t ylimbs[NLIMBS1], mp_limb_t prod_limbs[2 * NLIMBS1]) { size_t k, i, j; enum { len1 = NLIMBS1 }; enum { len2 = NLIMBS1 }; for (k = len2; k > 0; ) prod_limbs[--k] = 0; for (i = 0; i < len1; i++) { mp_limb_t digit1 = xlimbs[i]; mp_twolimb_t carry = 0; for (j = 0; j < len2; j++) { mp_limb_t digit2 = ylimbs[j]; carry += (mp_twolimb_t) digit1 * (mp_twolimb_t) digit2; carry += prod_limbs[i + j]; prod_limbs[i + j] = (mp_limb_t) carry; carry = carry >> GMP_LIMB_BITS; } prod_limbs[i + len2] = (mp_limb_t) carry; } } DOUBLE FUNC (DOUBLE x, DOUBLE y, DOUBLE z) { if (isfinite (x) && isfinite (y)) { if (isfinite (z)) { /* x, y, z are all finite. */ if (x == L_(0.0) || y == L_(0.0)) return z; if (z == L_(0.0)) return x * y; /* x, y, z are all non-zero. The result is x * y + z. */ { int e; /* exponent of x * y + z */ int sign; mp_limb_t sum[NLIMBS3]; size_t sum_len; { int xys; /* sign of x * y */ int zs; /* sign of z */ int xye; /* sum of exponents of x and y */ int ze; /* exponent of z */ mp_limb_t summand1[NLIMBS3]; size_t summand1_len; mp_limb_t summand2[NLIMBS3]; size_t summand2_len; { mp_limb_t zlimbs[NLIMBS1]; mp_limb_t xylimbs[2 * NLIMBS1]; { DOUBLE xn; /* normalized part of x */ DOUBLE yn; /* normalized part of y */ DOUBLE zn; /* normalized part of z */ int xe; /* exponent of x */ int ye; /* exponent of y */ mp_limb_t xlimbs[NLIMBS1]; mp_limb_t ylimbs[NLIMBS1]; xys = 0; xn = x; if (x < 0) { xys = 1; xn = - x; } yn = y; if (y < 0) { xys = 1 - xys; yn = - y; } zs = 0; zn = z; if (z < 0) { zs = 1; zn = - z; } /* xn, yn, zn are all positive. The result is (-1)^xys * xn * yn + (-1)^zs * zn. */ xn = FREXP (xn, &xe); yn = FREXP (yn, &ye); zn = FREXP (zn, &ze); xye = xe + ye; /* xn, yn, zn are all < 1.0 and >= 0.5. The result is (-1)^xys * 2^xye * xn * yn + (-1)^zs * 2^ze * zn. */ if (xye < ze - MANT_BIT) { /* 2^xye * xn * yn < 2^xye <= 2^(ze-MANT_BIT-1) */ return z; } if (xye - 2 * MANT_BIT > ze) { /* 2^ze * zn < 2^ze <= 2^(xye-2*MANT_BIT-1). We cannot simply do return x * y; because it would round differently: A round-to-even in the multiplication can be a round-up or round-down here, due to z. So replace z with a value that doesn't require the use of long bignums but that rounds the same way. */ zn = L_(0.5); ze = xye - 2 * MANT_BIT - 1; } /* Convert mantissas of xn, yn, zn to limb sequences: xlimbs = 2^MANT_BIT * xn ylimbs = 2^MANT_BIT * yn zlimbs = 2^MANT_BIT * zn */ decode (xn, xlimbs); decode (yn, ylimbs); decode (zn, zlimbs); /* Multiply the mantissas of xn and yn: xylimbs = xlimbs * ylimbs */ multiply (xlimbs, ylimbs, xylimbs); } /* The result is (-1)^xys * 2^(xye-2*MANT_BIT) * xylimbs + (-1)^zs * 2^(ze-MANT_BIT) * zlimbs. Compute e = min (xye-2*MANT_BIT, ze-MANT_BIT) then summand1 = 2^(xye-2*MANT_BIT-e) * xylimbs summand2 = 2^(ze-MANT_BIT-e) * zlimbs */ e = MIN (xye - 2 * MANT_BIT, ze - MANT_BIT); if (e == xye - 2 * MANT_BIT) { /* Simply copy the limbs of xylimbs. */ size_t i; for (i = 0; i < 2 * NLIMBS1; i++) summand1[i] = xylimbs[i]; summand1_len = 2 * NLIMBS1; } else { size_t ediff = xye - 2 * MANT_BIT - e; /* Left shift the limbs of xylimbs by ediff bits. */ size_t ldiff = ediff / GMP_LIMB_BITS; size_t shift = ediff % GMP_LIMB_BITS; size_t i; for (i = 0; i < ldiff; i++) summand1[i] = 0; if (shift > 0) { mp_limb_t carry = 0; for (i = 0; i < 2 * NLIMBS1; i++) { summand1[ldiff + i] = (xylimbs[i] << shift) | carry; carry = xylimbs[i] >> (GMP_LIMB_BITS - shift); } summand1[ldiff + 2 * NLIMBS1] = carry; summand1_len = ldiff + 2 * NLIMBS1 + 1; } else { for (i = 0; i < 2 * NLIMBS1; i++) summand1[ldiff + i] = xylimbs[i]; summand1_len = ldiff + 2 * NLIMBS1; } /* Estimation of needed array size: ediff = (xye - 2 * MANT_BIT) - (ze - MANT_BIT) <= MANT_BIT + 1 therefore summand1_len = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + 2 * NLIMBS1 <= (MANT_BIT + GMP_LIMB_BITS) / GMP_LIMB_BITS + 2 * NLIMBS1 <= 3 * NLIMBS1 + 1 = NLIMBS3 */ if (!(summand1_len <= NLIMBS3)) abort (); } if (e == ze - MANT_BIT) { /* Simply copy the limbs of zlimbs. */ size_t i; for (i = 0; i < NLIMBS1; i++) summand2[i] = zlimbs[i]; summand2_len = NLIMBS1; } else { size_t ediff = ze - MANT_BIT - e; /* Left shift the limbs of zlimbs by ediff bits. */ size_t ldiff = ediff / GMP_LIMB_BITS; size_t shift = ediff % GMP_LIMB_BITS; size_t i; for (i = 0; i < ldiff; i++) summand2[i] = 0; if (shift > 0) { mp_limb_t carry = 0; for (i = 0; i < NLIMBS1; i++) { summand2[ldiff + i] = (zlimbs[i] << shift) | carry; carry = zlimbs[i] >> (GMP_LIMB_BITS - shift); } summand2[ldiff + NLIMBS1] = carry; summand2_len = ldiff + NLIMBS1 + 1; } else { for (i = 0; i < NLIMBS1; i++) summand2[ldiff + i] = zlimbs[i]; summand2_len = ldiff + NLIMBS1; } /* Estimation of needed array size: ediff = (ze - MANT_BIT) - (xye - 2 * MANT_BIT) <= 2 * MANT_BIT therefore summand2_len = (ediff + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1 <= (2 * MANT_BIT + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS + NLIMBS1 <= 3 * NLIMBS1 + 1 = NLIMBS3 */ if (!(summand2_len <= NLIMBS3)) abort (); } } /* The result is (-1)^xys * 2^e * summand1 + (-1)^zs * 2^e * summand2. */ if (xys == zs) { /* Perform an addition. */ size_t i; mp_limb_t carry; sign = xys; carry = 0; for (i = 0; i < MIN (summand1_len, summand2_len); i++) { mp_limb_t digit1 = summand1[i]; mp_limb_t digit2 = summand2[i]; sum[i] = digit1 + digit2 + carry; carry = (carry ? digit1 >= (mp_limb_t)-1 - digit2 : digit1 > (mp_limb_t)-1 - digit2); } if (summand1_len > summand2_len) for (; i < summand1_len; i++) { mp_limb_t digit1 = summand1[i]; sum[i] = carry + digit1; carry = carry && digit1 == (mp_limb_t)-1; } else for (; i < summand2_len; i++) { mp_limb_t digit2 = summand2[i]; sum[i] = carry + digit2; carry = carry && digit2 == (mp_limb_t)-1; } if (carry) sum[i++] = carry; sum_len = i; } else { /* Perform a subtraction. */ /* Compare summand1 and summand2 by magnitude. */ while (summand1[summand1_len - 1] == 0) summand1_len--; while (summand2[summand2_len - 1] == 0) summand2_len--; if (summand1_len > summand2_len) sign = xys; else if (summand1_len < summand2_len) sign = zs; else { size_t i = summand1_len; for (;;) { --i; if (summand1[i] > summand2[i]) { sign = xys; break; } if (summand1[i] < summand2[i]) { sign = zs; break; } if (i == 0) /* summand1 and summand2 are equal. */ return L_(0.0); } } if (sign == xys) { /* Compute summand1 - summand2. */ size_t i; mp_limb_t carry; carry = 0; for (i = 0; i < summand2_len; i++) { mp_limb_t digit1 = summand1[i]; mp_limb_t digit2 = summand2[i]; sum[i] = digit1 - digit2 - carry; carry = (carry ? digit1 <= digit2 : digit1 < digit2); } for (; i < summand1_len; i++) { mp_limb_t digit1 = summand1[i]; sum[i] = digit1 - carry; carry = carry && digit1 == 0; } if (carry) abort (); sum_len = summand1_len; } else { /* Compute summand2 - summand1. */ size_t i; mp_limb_t carry; carry = 0; for (i = 0; i < summand1_len; i++) { mp_limb_t digit1 = summand1[i]; mp_limb_t digit2 = summand2[i]; sum[i] = digit2 - digit1 - carry; carry = (carry ? digit2 <= digit1 : digit2 < digit1); } for (; i < summand2_len; i++) { mp_limb_t digit2 = summand2[i]; sum[i] = digit2 - carry; carry = carry && digit2 == 0; } if (carry) abort (); sum_len = summand2_len; } } } /* The result is (-1)^sign * 2^e * sum. */ /* Now perform the rounding to MANT_BIT mantissa bits. */ while (sum[sum_len - 1] == 0) sum_len--; /* Here we know that the most significant limb, sum[sum_len - 1], is non-zero. */ { /* How many bits the sum has. */ unsigned int sum_bits = integer_length (sum[sum_len - 1]) + (sum_len - 1) * GMP_LIMB_BITS; /* How many bits to keep when rounding. */ unsigned int keep_bits; /* How many bits to round off. */ unsigned int roundoff_bits; if (e + (int) sum_bits >= MIN_EXP) /* 2^e * sum >= 2^(MIN_EXP-1). result will be a normalized number. */ keep_bits = MANT_BIT; else if (e + (int) sum_bits >= MIN_EXP - MANT_BIT) /* 2^e * sum >= 2^(MIN_EXP-MANT_BIT-1). result will be a denormalized number or rounded to zero. */ keep_bits = e + (int) sum_bits - (MIN_EXP + MANT_BIT); else /* 2^e * sum < 2^(MIN_EXP-MANT_BIT-1). Round to zero. */ return L_(0.0); /* Note: 0 <= keep_bits <= MANT_BIT. */ if (sum_bits <= keep_bits) { /* Nothing to do. */ roundoff_bits = 0; keep_bits = sum_bits; } else { int round_up; roundoff_bits = sum_bits - keep_bits; /* > 0, <= sum_bits */ { #if HAVE_FEGETROUND && defined FE_TOWARDZERO /* Cf. */ int rounding_mode = fegetround (); if (rounding_mode == FE_TOWARDZERO) round_up = 0; # if defined FE_DOWNWARD /* not defined on sh4 */ else if (rounding_mode == FE_DOWNWARD) round_up = sign; # endif # if defined FE_UPWARD /* not defined on sh4 */ else if (rounding_mode == FE_UPWARD) round_up = !sign; # endif #else /* Cf. */ int rounding_mode = FLT_ROUNDS; if (rounding_mode == 0) /* toward zero */ round_up = 0; else if (rounding_mode == 3) /* toward negative infinity */ round_up = sign; else if (rounding_mode == 2) /* toward positive infinity */ round_up = !sign; #endif else { /* Round to nearest. */ round_up = 0; /* Test bit (roundoff_bits-1). */ if ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS] >> ((roundoff_bits - 1) % GMP_LIMB_BITS)) & 1) { /* Test bits roundoff_bits-1 .. 0. */ bool halfway = ((sum[(roundoff_bits - 1) / GMP_LIMB_BITS] & (((mp_limb_t) 1 << ((roundoff_bits - 1) % GMP_LIMB_BITS)) - 1)) == 0); if (halfway) { int i; for (i = (roundoff_bits - 1) / GMP_LIMB_BITS - 1; i >= 0; i--) if (sum[i] != 0) { halfway = false; break; } } if (halfway) /* Round to even. Test bit roundoff_bits. */ round_up = ((sum[roundoff_bits / GMP_LIMB_BITS] >> (roundoff_bits % GMP_LIMB_BITS)) & 1); else /* Round up. */ round_up = 1; } } } /* Perform the rounding. */ { size_t i = roundoff_bits / GMP_LIMB_BITS; { size_t j = i; while (j > 0) sum[--j] = 0; } if (round_up) { /* Round up. */ sum[i] = (sum[i] | (((mp_limb_t) 1 << (roundoff_bits % GMP_LIMB_BITS)) - 1)) + 1; if (sum[i] == 0) { /* Propagate carry. */ while (i < sum_len - 1) { ++i; sum[i] += 1; if (sum[i] != 0) break; } } /* sum[i] is the most significant limb that was incremented. */ if (i == sum_len - 1 && (sum[i] & (sum[i] - 1)) == 0) { /* Through the carry, one more bit is needed. */ if (sum[i] != 0) sum_bits += 1; else { /* Instead of requiring one more limb of memory, perform a shift by one bit, and adjust the exponent. */ sum[i] = (mp_limb_t) 1 << (GMP_LIMB_BITS - 1); e += 1; } /* The bit sequence has the form 1000...000. */ keep_bits = 1; } } else { /* Round down. */ sum[i] &= ((mp_limb_t) -1 << (roundoff_bits % GMP_LIMB_BITS)); if (i == sum_len - 1 && sum[i] == 0) /* The entire sum has become zero. */ return L_(0.0); } } } /* The result is (-1)^sign * 2^e * sum and here we know that 2^(sum_bits-1) <= sum < 2^sum_bits, and sum is a multiple of 2^(sum_bits-keep_bits), where 0 < keep_bits <= MANT_BIT and keep_bits <= sum_bits. (If keep_bits was initially 0, the rounding either returned zero or produced a bit sequence of the form 1000...000, setting keep_bits to 1.) */ { /* Split the keep_bits bits into chunks of at most 32 bits. */ unsigned int chunk_count = (keep_bits - 1) / GMP_LIMB_BITS + 1; /* 1 <= chunk_count <= ceil (sum_bits / GMP_LIMB_BITS) = sum_len. */ static const DOUBLE chunk_multiplier = /* 2^-GMP_LIMB_BITS */ L_(1.0) / ((DOUBLE) (1 << (GMP_LIMB_BITS / 2)) * (DOUBLE) (1 << ((GMP_LIMB_BITS + 1) / 2))); unsigned int shift = sum_bits % GMP_LIMB_BITS; DOUBLE fsum; if (MANT_BIT <= GMP_LIMB_BITS) { /* Since keep_bits <= MANT_BIT <= GMP_LIMB_BITS, chunk_count is 1. No need for a loop. */ if (shift == 0) fsum = (DOUBLE) sum[sum_len - 1]; else fsum = (DOUBLE) ((sum[sum_len - 1] << (GMP_LIMB_BITS - shift)) | (sum_len >= 2 ? sum[sum_len - 2] >> shift : 0)); } else { int k; k = chunk_count - 1; if (shift == 0) { /* First loop round. */ fsum = (DOUBLE) sum[sum_len - k - 1]; /* General loop. */ while (--k >= 0) { fsum *= chunk_multiplier; fsum += (DOUBLE) sum[sum_len - k - 1]; } } else { /* First loop round. */ { VOLATILE mp_limb_t chunk = (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) | (sum_len >= k + 2 ? sum[sum_len - k - 2] >> shift : 0); fsum = (DOUBLE) chunk; } /* General loop. */ while (--k >= 0) { fsum *= chunk_multiplier; { VOLATILE mp_limb_t chunk = (sum[sum_len - k - 1] << (GMP_LIMB_BITS - shift)) | (sum[sum_len - k - 2] >> shift); fsum += (DOUBLE) chunk; } } } } fsum = LDEXP (fsum, e + (int) sum_bits - GMP_LIMB_BITS); return (sign ? - fsum : fsum); } } } } else return z; } else return (x * y) + z; }