diff options
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | lib/strtod.c | 419 | ||||
-rw-r--r-- | m4/strtod.m4 | 24 | ||||
-rw-r--r-- | modules/strtod | 4 | ||||
-rw-r--r-- | modules/strtod-tests | 1 |
5 files changed, 257 insertions, 213 deletions
@@ -1,3 +1,25 @@ +2010-07-12 Paul R. Eggert <eggert@cs.ucla.edu> + + strtod: make it more-accurate typically, and don't require libm + * lib/strtod.c (_GL_ARG_NONNULL): Remove; no longer needed. + Include limits.h. Don't include string.h. + (HAVE_LDEXP_IN_LIBC, HAVE_RAW_DECL_STRTOD): Define to 0 if not defined. + (locale_isspace): New function, so that no casts are needed to + check whether *s is a space. + (ldexp): Provide an unused dummy if not available. + (scale_radix_exp, parse_number, underlying_strtod): New functions. + (strtod): Use them. This implementation prefers to use the + underlying strtod if available, falling back on our own code + only to fix known bugs. This is more likely to produce an + accurate result. Also, it avoids the use of libm functions. + * m4/strtod.m4 (gl_FUNC_STRTOD): Don't invoke _AC_LIBOBJ_STRTOD; + no longer needed. Invoke AC_LIBOBJ([strtod]); don't know why this + was absent, but it caused a test failure with coreutils. + (gl_PREREQ_STRTOD): Check wither ldexp can be used without linking + with libm. + * modules/strtod (Makefile.am, Link): libm is no longer needed. + * modules/strtod-tests (Makefile.am): Likewise. + 2010-07-11 Pádraig Brady <P@draigBrady.com> Bruno Haible <bruno@clisp.org> diff --git a/lib/strtod.c b/lib/strtod.c index 371476ee4d..61887598ca 100644 --- a/lib/strtod.c +++ b/lib/strtod.c @@ -16,261 +16,274 @@ #include <config.h> -/* Don't use __attribute__ __nonnull__ in this compilation unit. Otherwise gcc - optimizes away the nptr == NULL test below. */ -#define _GL_ARG_NONNULL(params) - #include <stdlib.h> #include <ctype.h> #include <errno.h> #include <float.h> +#include <limits.h> #include <math.h> #include <stdbool.h> -#include <string.h> #include "c-ctype.h" -/* Convert NPTR to a double. If ENDPTR is not NULL, a pointer to the - character after the last one used in the number is put in *ENDPTR. */ -double -strtod (const char *nptr, char **endptr) +#ifndef HAVE_LDEXP_IN_LIBC +#define HAVE_LDEXP_IN_LIBC 0 +#endif +#ifndef HAVE_RAW_DECL_STRTOD +#define HAVE_RAW_DECL_STRTOD 0 +#endif + +/* Return true if C is a space in the current locale, avoiding + problems with signed char and isspace. */ +static bool +locale_isspace (char c) { - const unsigned char *s; - bool negative = false; + unsigned char uc = c; + return isspace (uc) != 0; +} - /* The number so far. */ - double num; +#if !HAVE_LDEXP_IN_LIBC + #define ldexp dummy_ldexp + static double ldexp (double x, int exponent) { return x + exponent; } +#endif - bool got_dot; /* Found a decimal point. */ - bool got_digit; /* Seen any digits. */ - bool hex = false; /* Look for hex float exponent. */ +/* Return X * BASE**EXPONENT. Return an extreme value and set errno + to ERANGE if underflow or overflow occurs. */ +static double +scale_radix_exp (double x, int radix, long int exponent) +{ + /* If RADIX == 10, this code is neither precise nor fast; it is + merely a straightforward and relatively portable approximation. + If N == 2, this code is precise on a radix-2 implementation, + albeit perhaps not fast if ldexp is not in libc. */ - /* The exponent of the number. */ - long int exponent; + long int e = exponent; - if (nptr == NULL) + if (HAVE_LDEXP_IN_LIBC && radix == 2) + return ldexp (x, e < INT_MIN ? INT_MIN : INT_MAX < e ? INT_MAX : e); + else { - errno = EINVAL; - goto noconv; - } - - /* Use unsigned char for the ctype routines. */ - s = (unsigned char *) nptr; - - /* Eat whitespace. */ - while (isspace (*s)) - ++s; - - /* Get the sign. */ - negative = *s == '-'; - if (*s == '-' || *s == '+') - ++s; - - num = 0.0; - got_dot = false; - got_digit = false; - exponent = 0; + double r = x; - /* Check for hex float. */ - if (*s == '0' && c_tolower (s[1]) == 'x' - && (c_isxdigit (s[2]) || ('.' == s[2] && c_isxdigit (s[3])))) - { - hex = true; - s += 2; - for (;; ++s) + if (r != 0) { - if (c_isxdigit (*s)) + if (e < 0) { - got_digit = true; - - /* Make sure that multiplication by 16 will not overflow. */ - if (num > DBL_MAX / 16) - /* The value of the digit doesn't matter, since we have already - gotten as many digits as can be represented in a `double'. - This doesn't necessarily mean the result will overflow. - The exponent may reduce it to within range. - - We just need to record that there was another - digit so that we can multiply by 16 later. */ - ++exponent; - else - num = ((num * 16.0) - + (c_tolower (*s) - (c_isdigit (*s) ? '0' : 'a' - 10))); - - /* Keep track of the number of digits after the decimal point. - If we just divided by 16 here, we would lose precision. */ - if (got_dot) - --exponent; + while (e++ != 0) + { + r /= radix; + if (r == 0 && x != 0) + { + errno = ERANGE; + break; + } + } } - else if (!got_dot && *s == '.') - /* Record that we have found the decimal point. */ - got_dot = true; else - /* Any other character terminates the number. */ - break; - } - } - - /* Not a hex float. */ - else - { - for (;; ++s) - { - if (c_isdigit (*s)) { - got_digit = true; - - /* Make sure that multiplication by 10 will not overflow. */ - if (num > DBL_MAX * 0.1) - /* The value of the digit doesn't matter, since we have already - gotten as many digits as can be represented in a `double'. - This doesn't necessarily mean the result will overflow. - The exponent may reduce it to within range. - - We just need to record that there was another - digit so that we can multiply by 10 later. */ - ++exponent; - else - num = (num * 10.0) + (*s - '0'); - - /* Keep track of the number of digits after the decimal point. - If we just divided by 10 here, we would lose precision. */ - if (got_dot) - --exponent; + while (e-- != 0) + { + if (r < -DBL_MAX / radix) + { + errno = ERANGE; + return -HUGE_VAL; + } + else if (DBL_MAX / radix < r) + { + errno = ERANGE; + return HUGE_VAL; + } + else + r *= radix; + } } - else if (!got_dot && *s == '.') - /* Record that we have found the decimal point. */ - got_dot = true; - else - /* Any other character terminates the number. */ - break; } + + return r; } +} + +/* Parse a number at NPTR; this is a bit like strtol (NPTR, ENDPTR) + except there are no leading spaces or signs or "0x", and ENDPTR is + nonnull. The number uses a base BASE (either 10 or 16) fraction, a + radix RADIX (either 10 or 2) exponent, and exponent character + EXPCHAR. To convert from a number of digits to a radix exponent, + multiply by RADIX_MULTIPLIER (either 1 or 4). */ +static double +parse_number (const char *nptr, + int base, int radix, int radix_multiplier, char expchar, + char **endptr) +{ + const char *s = nptr; + bool got_dot = false; + long int exponent = 0; + double num = 0; - if (!got_digit) + for (;; ++s) { - /* Check for infinities and NaNs. */ - if (c_tolower (*s) == 'i' - && c_tolower (s[1]) == 'n' - && c_tolower (s[2]) == 'f') + int digit; + if (c_isdigit (*s)) + digit = *s - '0'; + else if (base == 16 && c_isxdigit (*s)) + digit = c_tolower (*s) - ('a' - 10); + else if (! got_dot && *s == '.') { - s += 3; - num = HUGE_VAL; - if (c_tolower (*s) == 'i' - && c_tolower (s[1]) == 'n' - && c_tolower (s[2]) == 'i' - && c_tolower (s[3]) == 't' - && c_tolower (s[4]) == 'y') - s += 5; - goto valid; + /* Record that we have found the decimal point. */ + got_dot = true; + continue; } -#ifdef NAN - else if (c_tolower (*s) == 'n' - && c_tolower (s[1]) == 'a' - && c_tolower (s[2]) == 'n') + else + /* Any other character terminates the number. */ + break; + + /* Make sure that multiplication by base will not overflow. */ + if (num <= DBL_MAX / base) + num = num * base + digit; + else { - s += 3; - num = NAN; - /* Since nan(<n-char-sequence>) is implementation-defined, - we define it by ignoring <n-char-sequence>. A nicer - implementation would populate the bits of the NaN - according to interpreting n-char-sequence as a - hexadecimal number, but the result is still a NaN. */ - if (*s == '(') - { - const unsigned char *p = s + 1; - while (c_isalnum (*p)) - p++; - if (*p == ')') - s = p + 1; - } - goto valid; + /* The value of the digit doesn't matter, since we have already + gotten as many digits as can be represented in a `double'. + This doesn't necessarily mean the result will overflow. + The exponent may reduce it to within range. + + We just need to record that there was another + digit so that we can multiply by 10 later. */ + exponent += radix_multiplier; } -#endif - goto noconv; + + /* Keep track of the number of digits after the decimal point. + If we just divided by base here, we might lose precision. */ + if (got_dot) + exponent -= radix_multiplier; } - if (c_tolower (*s) == (hex ? 'p' : 'e') && !isspace (s[1])) + if (c_tolower (*s) == expchar && ! locale_isspace (s[1])) { - /* Get the exponent specified after the `e' or `E'. */ + /* Add any given exponent to the implicit one. */ int save = errno; char *end; - long int value; + long int value = strtol (s + 1, &end, 10); + errno = save; - errno = 0; - ++s; - value = strtol ((char *) s, &end, 10); - if (errno == ERANGE && num) + if (s + 1 != end) { - /* The exponent overflowed a `long int'. It is probably a safe - assumption that an exponent that cannot be represented by - a `long int' exceeds the limits of a `double'. */ - if (endptr != NULL) - *endptr = end; - if (value < 0) - goto underflow; - else - goto overflow; + /* Skip past the exponent, and add in the implicit exponent, + resulting in an extreme value on overflow. */ + s = end; + exponent = + (exponent < 0 + ? (value < LONG_MIN - exponent ? LONG_MIN : exponent + value) + : (LONG_MAX - exponent < value ? LONG_MAX : exponent + value)); } - else if (end == (char *) s) - /* There was no exponent. Reset END to point to - the 'e' or 'E', so *ENDPTR will be set there. */ - end = (char *) s - 1; - errno = save; - s = (unsigned char *) end; - exponent += value; } - if (num == 0.0) - goto valid; + *endptr = (char *) s; + return scale_radix_exp (num, radix, exponent); +} - if (hex) - { - /* ldexp takes care of range errors. */ - num = ldexp (num, exponent); - goto valid; - } +static double underlying_strtod (const char *, char **); + +/* Convert NPTR to a double. If ENDPTR is not NULL, a pointer to the + character after the last one used in the number is put in *ENDPTR. */ +double +strtod (const char *nptr, char **endptr) +{ + bool negative = false; + + /* The number so far. */ + double num; + + const char *s = nptr; + char *end; + + /* Eat whitespace. */ + while (locale_isspace (*s)) + ++s; + + /* Get the sign. */ + negative = *s == '-'; + if (*s == '-' || *s == '+') + ++s; - /* Multiply NUM by 10 to the EXPONENT power, - checking for overflow and underflow. */ + num = underlying_strtod (s, &end); - if (exponent < 0) + if (c_isdigit (s[*s == '.'])) { - if (num < DBL_MIN * pow (10.0, (double) -exponent)) - goto underflow; + /* If a hex float was converted incorrectly, do it ourselves. */ + if (*s == '0' && c_tolower (s[1]) == 'x' && end <= s + 2 + && c_isxdigit (s[2 + (s[2] == '.')])) + num = parse_number (s + 2, 16, 2, 4, 'p', &end); + + s = end; } - else if (exponent > 0) + + /* Check for infinities and NaNs. */ + else if (c_tolower (*s) == 'i' + && c_tolower (s[1]) == 'n' + && c_tolower (s[2]) == 'f') { - if (num > DBL_MAX * pow (10.0, (double) -exponent)) - goto overflow; + s += 3; + if (c_tolower (*s) == 'i' + && c_tolower (s[1]) == 'n' + && c_tolower (s[2]) == 'i' + && c_tolower (s[3]) == 't' + && c_tolower (s[4]) == 'y') + s += 5; + num = HUGE_VAL; } + else if (c_tolower (*s) == 'n' + && c_tolower (s[1]) == 'a' + && c_tolower (s[2]) == 'n') + { + s += 3; + if (*s == '(') + { + const char *p = s + 1; + while (c_isalnum (*p)) + p++; + if (*p == ')') + s = p + 1; + } - num *= pow (10.0, (double) exponent); + /* If the underlying implementation misparsed the NaN, assume + its result is incorrect, and return a NaN. Normally it's + better to use the underlying implementation's result, since a + nice implementation populates the bits of the NaN according + to interpreting n-char-sequence as a hexadecimal number. */ + if (s != end) + num = NAN; + } + else + { + /* No conversion could be performed. */ + errno = EINVAL; + s = nptr; + } - valid: if (endptr != NULL) *endptr = (char *) s; return negative ? -num : num; +} - overflow: - /* Return an overflow error. */ - if (endptr != NULL) - *endptr = (char *) s; - errno = ERANGE; - return negative ? -HUGE_VAL : HUGE_VAL; - - underflow: - /* Return an underflow error. */ - if (endptr != NULL) - *endptr = (char *) s; - errno = ERANGE; - return negative ? -0.0 : 0.0; - - noconv: - /* There was no number. */ - if (endptr != NULL) - *endptr = (char *) nptr; - errno = EINVAL; - return 0.0; +/* The "underlying" strtod implementation. This must be defined + after strtod because it #undefs strtod. */ +static double +underlying_strtod (const char *nptr, char **endptr) +{ + if (HAVE_RAW_DECL_STRTOD) + { + /* Prefer the native strtod if available. Usually it should + work and it should give more-accurate results than our + approximation. */ + #undef strtod + return strtod (nptr, endptr); + } + else + { + /* Approximate strtod well enough for this module. There's no + need to handle anything but finite unsigned decimal + numbers with nonnull ENDPTR. */ + return parse_number (nptr, 10, 10, 1, 'e', endptr); + } } diff --git a/m4/strtod.m4 b/m4/strtod.m4 index aa6a1c53d5..05d36bd4b3 100644 --- a/m4/strtod.m4 +++ b/m4/strtod.m4 @@ -11,7 +11,7 @@ AC_DEFUN([gl_FUNC_STRTOD], dnl Don't call AC_FUNC_STRTOD, because it does not have the right guess dnl when cross-compiling. dnl Don't call AC_CHECK_FUNCS([strtod]) because it would collide with the - dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro, + dnl ac_cv_func_strtod variable set by the AC_FUNC_STRTOD macro. AC_CHECK_DECLS_ONCE([strtod]) if test $ac_cv_have_decl_strtod != yes; then HAVE_STRTOD=0 @@ -113,12 +113,26 @@ numeric_equal (double x, double y) fi fi if test $HAVE_STRTOD = 0 || test $REPLACE_STRTOD = 1; then + AC_LIBOBJ([strtod]) gl_PREREQ_STRTOD - dnl Use undocumented macro to set POW_LIB correctly. - _AC_LIBOBJ_STRTOD fi ]) # Prerequisites of lib/strtod.c. -# The need for pow() is already handled by AC_FUNC_STRTOD. -AC_DEFUN([gl_PREREQ_STRTOD], [:]) +# FIXME: This implementation is a copy of printf-frexp.m4 and should be shared. +AC_DEFUN([gl_PREREQ_STRTOD], [ + AC_CACHE_CHECK([whether ldexp can be used without linking with libm], + [gl_cv_func_ldexp_no_libm], + [ + AC_TRY_LINK([#include <math.h> + double x; + int y;], + [return ldexp (x, y) < 1;], + [gl_cv_func_ldexp_no_libm=yes], + [gl_cv_func_ldexp_no_libm=no]) + ]) + if test $gl_cv_func_ldexp_no_libm = yes; then + AC_DEFINE([HAVE_LDEXP_IN_LIBC], [1], + [Define if the ldexp function is available in libc.]) + fi +]) diff --git a/modules/strtod b/modules/strtod index 9a4a0f31d2..d4f64c13d4 100644 --- a/modules/strtod +++ b/modules/strtod @@ -15,14 +15,10 @@ gl_FUNC_STRTOD gl_STDLIB_MODULE_INDICATOR([strtod]) Makefile.am: -LIBS += $(POW_LIB) Include: <stdlib.h> -Link: -$(POW_LIB) - License: LGPL diff --git a/modules/strtod-tests b/modules/strtod-tests index 8d5be7f7f7..bea7825e77 100644 --- a/modules/strtod-tests +++ b/modules/strtod-tests @@ -11,6 +11,5 @@ signbit configure.ac: Makefile.am: -LIBS += $(POW_LIB) TESTS += test-strtod check_PROGRAMS += test-strtod |