diff options
author | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-11-06 17:31:45 +0000 |
---|---|---|
committer | joseph <joseph@7b3dc134-2b1b-0410-93df-9e9f96275f8d> | 2012-11-06 17:31:45 +0000 |
commit | 5c8ae23aecdb14ee22ba06684c488cfe0306ff0e (patch) | |
tree | daf286cd6c5edb7441d779682e09e8dc511e57c9 /libc/sysdeps/ieee754 | |
parent | db0fbac046813774566dfc025932d4e8c0a35640 (diff) | |
download | eglibc2-5c8ae23aecdb14ee22ba06684c488cfe0306ff0e.tar.gz |
Merge changes between r21352 and r21563 from /fsf/trunk.
git-svn-id: svn://svn.eglibc.org/trunk@21564 7b3dc134-2b1b-0410-93df-9e9f96275f8d
Diffstat (limited to 'libc/sysdeps/ieee754')
-rw-r--r-- | libc/sysdeps/ieee754/dbl-64/s_fma.c | 134 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128/s_fmal.c | 137 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c | 3 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fma.c | 18 | ||||
-rw-r--r-- | libc/sysdeps/ieee754/ldbl-96/s_fmal.c | 135 |
5 files changed, 310 insertions, 117 deletions
diff --git a/libc/sysdeps/ieee754/dbl-64/s_fma.c b/libc/sysdeps/ieee754/dbl-64/s_fma.c index 5e21461a4..8c69b987e 100644 --- a/libc/sysdeps/ieee754/dbl-64/s_fma.c +++ b/libc/sysdeps/ieee754/dbl-64/s_fma.c @@ -22,6 +22,7 @@ #include <fenv.h> #include <ieee754.h> #include <math_private.h> +#include <tininess.h> /* This implementation uses rounding to odd to avoid problems with double rounding. See a paper by Boldo and Melquiond: @@ -54,17 +55,46 @@ __fma (double x, double y, double z) underflows to 0. */ if (z == 0 && x != 0 && y != 0) return x * y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of DBL_DENORM_MIN, - compute as x * y + z. */ + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7ff || v.ieee.exponent == 0x7ff || w.ieee.exponent == 0x7ff - || u.ieee.exponent + v.ieee.exponent - > 0x7ff + IEEE754_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent > 0x7ff + IEEE754_DOUBLE_BIAS) + return x * y; + /* If x * y is less than 1/4 of DBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + double tiny = neg ? -0x1p-1074 : 0x1p-1074; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p54 + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 55 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0))) + { + volatile double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-54; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG) { @@ -84,8 +114,17 @@ __fma (double x, double y, double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > DBL_MANT_DIG) u.ieee.exponent -= DBL_MANT_DIG; @@ -115,15 +154,15 @@ __fma (double x, double y, double z) <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * DBL_MANT_DIG; + u.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * DBL_MANT_DIG; - if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4) + v.ieee.exponent += 2 * DBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * DBL_MANT_DIG; + w.ieee.exponent += 2 * DBL_MANT_DIG + 2; else - w.d *= 0x1p106; + w.d *= 0x1p108; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -138,6 +177,9 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + libc_feholdexcept_setround (&env, FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1) double x1 = x * C; @@ -156,9 +198,20 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; double a2 = t1 + t2; + feclearexcept (FE_INEXACT); - fenv_t env; - libc_feholdexcept_setround (&env, FE_TOWARDZERO); + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + libc_feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } + + libc_fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; @@ -194,39 +247,44 @@ __fma (double x, double y, double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-106; + return v.d * 0x1p-108; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 106) - return (a1 + u.d) * 0x1p-106; - /* If v.d * 0x1p-106 with round to zero is a subnormal above - or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa + if (v.ieee.exponent > 108) + return (a1 + u.d) * 0x1p-108; + /* If v.d * 0x1p-108 with round to zero is a subnormal above + or equal to DBL_MIN / 2, then v.d * 0x1p-108 shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-106 never normalizes by shifting up, + v.d * 0x1p-108 never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 106) + if (v.ieee.exponent == 108) { - /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, - v.ieee.mantissa1 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa1 & 3) == 1) + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) { - v.d *= 0x1p-106; - if (v.ieee.negative) - return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */; - else - return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */; + w.d = a1 + u.d; + if (w.ieee.exponent == 109) + return w.d * 0x1p-108; } - else - return v.d * 0x1p-106; + /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, + v.ieee.mantissa1 & 1 is the round bit and j is our sticky + bit. */ + w.d = 0.0; + w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa1 &= ~3U; + v.d *= 0x1p-108; + w.d *= 0x1p-2; + return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-106; + return v.d * 0x1p-108; } } #ifndef __fma diff --git a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c index 46b3d81ce..c9accad8a 100644 --- a/libc/sysdeps/ieee754/ldbl-128/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-128/s_fmal.c @@ -22,6 +22,7 @@ #include <fenv.h> #include <ieee754.h> #include <math_private.h> +#include <tininess.h> /* This implementation uses rounding to odd to avoid problems with double rounding. See a paper by Boldo and Melquiond: @@ -55,17 +56,49 @@ __fmal (long double x, long double y, long double z) underflows to 0. */ if (z == 0 && x != 0 && y != 0) return x * y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff - || u.ieee.exponent + v.ieee.exponent - > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent + > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) + return x * y; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16494L : 0x1p-16494L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p114L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 115 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa3 == 0 + && w.ieee.mantissa2 == 0 + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-114L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { @@ -85,8 +118,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -116,15 +158,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p226L; + w.d *= 0x1p228L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -139,6 +181,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -157,9 +203,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; @@ -195,39 +251,44 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 226) - return (a1 + u.d) * 0x1p-226L; - /* If v.d * 0x1p-226L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-226L shifts mantissa + if (v.ieee.exponent > 228) + return (a1 + u.d) * 0x1p-228L; + /* If v.d * 0x1p-228L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa down just by 1 bit, which means v.ieee.mantissa3 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-226L never normalizes by shifting up, + v.d * 0x1p-228L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 226) + if (v.ieee.exponent == 228) { - /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, - v.ieee.mantissa3 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa3 & 3) == 1) + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) { - v.d *= 0x1p-226L; - if (v.ieee.negative) - return v.d - 0x1p-16494L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16494L /* __LDBL_DENORM_MIN__ */; + w.d = a1 + u.d; + if (w.ieee.exponent == 229) + return w.d * 0x1p-228L; } - else - return v.d * 0x1p-226L; + /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, + v.ieee.mantissa3 & 1 is the round bit and j is our sticky + bit. */ + w.d = 0.0L; + w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa3 &= ~3U; + v.d *= 0x1p-228L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa3 |= j; - return v.d * 0x1p-226L; + return v.d * 0x1p-228L; } } weak_alias (__fmal, fmal) diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c index fe5c8bd8d..3e0535561 100644 --- a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c +++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c @@ -65,7 +65,8 @@ __ieee754_atan2l(long double y, long double x) if(((ix)>0x7ff0000000000000LL)|| ((iy)>0x7ff0000000000000LL)) /* x or y is NaN */ return x+y; - if(((hx-0x3ff0000000000000LL))==0) return __atanl(y); /* x=1.0L */ + if(((hx-0x3ff0000000000000LL))==0 + && (lx&0x7fffffffffffffff)==0) return __atanl(y); /* x=1.0L */ m = ((hy>>63)&1)|((hx>>62)&2); /* 2*sign(x)+sign(y) */ /* when y = 0 */ diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fma.c b/libc/sysdeps/ieee754/ldbl-96/s_fma.c index 001d8063d..bf2d794c9 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fma.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fma.c @@ -42,6 +42,10 @@ __fma (double x, double y, double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1ULL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = (long double) x * C; @@ -60,9 +64,19 @@ __fma (double x, double y, double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ a2 = a2 + m2; diff --git a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c index d12512428..c86dff6f8 100644 --- a/libc/sysdeps/ieee754/ldbl-96/s_fmal.c +++ b/libc/sysdeps/ieee754/ldbl-96/s_fmal.c @@ -22,6 +22,7 @@ #include <fenv.h> #include <ieee754.h> #include <math_private.h> +#include <tininess.h> /* This implementation uses rounding to odd to avoid problems with double rounding. See a paper by Boldo and Melquiond: @@ -55,17 +56,47 @@ __fmal (long double x, long double y, long double z) underflows to 0. */ if (z == 0 && x != 0 && y != 0) return x * y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of LDBL_DENORM_MIN, - compute as x * y + z. */ + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff - || u.ieee.exponent + v.ieee.exponent - > 0x7fff + IEEE854_LONG_DOUBLE_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent + > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) + return x * y; + /* If x * y is less than 1/4 of LDBL_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + long double tiny = neg ? -0x1p-16445L : 0x1p-16445L; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.d = z * 0x1p65L + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 66 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mantissa1 == 0 + && w.ieee.mantissa0 == 0x80000000))) + { + volatile long double force_underflow = x * y; + (void) force_underflow; + } + return v.d * 0x1p-65L; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) { @@ -85,8 +116,17 @@ __fmal (long double x, long double y, long double z) { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + else + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > LDBL_MANT_DIG) u.ieee.exponent -= LDBL_MANT_DIG; @@ -116,15 +156,15 @@ __fmal (long double x, long double y, long double z) <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * LDBL_MANT_DIG; + u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - v.ieee.exponent += 2 * LDBL_MANT_DIG; - if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4) + v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * LDBL_MANT_DIG; + w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; else - w.d *= 0x1p128L; + w.d *= 0x1p130L; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -139,6 +179,10 @@ __fmal (long double x, long double y, long double z) if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) return x * y + z; + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) long double x1 = x * C; @@ -157,9 +201,19 @@ __fmal (long double x, long double y, long double z) t1 = m1 - t1; t2 = z - t2; long double a2 = t1 + t2; + feclearexcept (FE_INEXACT); + + /* If the result is an exact zero, ensure it has the correct + sign. */ + if (a1 == 0 && m2 == 0) + { + feupdateenv (&env); + /* Ensure that round-to-nearest value of z + m1 is not + reused. */ + asm volatile ("" : "=m" (z) : "m" (z)); + return z + m1; + } - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); /* Perform m2 + a2 addition with round to odd. */ u.d = a2 + m2; @@ -195,39 +249,44 @@ __fmal (long double x, long double y, long double z) /* If a1 + u.d is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 128) - return (a1 + u.d) * 0x1p-128L; - /* If v.d * 0x1p-128L with round to zero is a subnormal above - or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa + if (v.ieee.exponent > 130) + return (a1 + u.d) * 0x1p-130L; + /* If v.d * 0x1p-130L with round to zero is a subnormal above + or equal to LDBL_MIN / 2, then v.d * 0x1p-130L shifts mantissa down just by 1 bit, which means v.ieee.mantissa1 |= j would change the round bit, not sticky or guard bit. - v.d * 0x1p-128L never normalizes by shifting up, + v.d * 0x1p-130L never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 128) + if (v.ieee.exponent == 130) { - /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, - v.ieee.mantissa1 & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mantissa1 & 3) == 1) + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) { - v.d *= 0x1p-128L; - if (v.ieee.negative) - return v.d - 0x1p-16445L /* __LDBL_DENORM_MIN__ */; - else - return v.d + 0x1p-16445L /* __LDBL_DENORM_MIN__ */; + w.d = a1 + u.d; + if (w.ieee.exponent == 131) + return w.d * 0x1p-130L; } - else - return v.d * 0x1p-128L; + /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding, + v.ieee.mantissa1 & 1 is the round bit and j is our sticky + bit. */ + w.d = 0.0L; + w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mantissa1 &= ~3U; + v.d *= 0x1p-130L; + w.d *= 0x1p-2L; + return v.d + w.d; } v.ieee.mantissa1 |= j; - return v.d * 0x1p-128L; + return v.d * 0x1p-130L; } } weak_alias (__fmal, fmal) |