diff options
author | Anne Archibald <archibald@astron.nl> | 2015-08-12 17:46:14 +0200 |
---|---|---|
committer | Anne Archibald <archibald@astron.nl> | 2015-08-28 15:52:42 +0200 |
commit | 6cbd724f75f25cdaa7cf68fd9743064b77fbf787 (patch) | |
tree | 816e86d5806f60c9e2bd618fe3874a6bec373e10 /numpy/core | |
parent | b478ded953395fee6182439ff5e8eb38fd4271ce (diff) | |
download | numpy-6cbd724f75f25cdaa7cf68fd9743064b77fbf787.tar.gz |
BUG: fix #4381: precision loss on string -> longdouble conversion
Avoid going through python floats when converting string to
longdouble. This makes it dramatically easier to produce
full-precision long double numbers. Fixed are the constructor
(np.longdouble("1.01")), np.fromfile, np.fromstring, np.loadtxt,
and np.genfromtxt (and functions based on it). Also fixed is
precision loss when using np.tofile.
This also fixes #1481, poor handling of bad data in fromfile
and fromstring.
If the function strtod_l is not available, almost none of this
will work, and many tests will fail.
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/setup_common.py | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 86 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 23 | ||||
-rw-r--r-- | numpy/core/src/multiarray/numpyos.c | 159 | ||||
-rw-r--r-- | numpy/core/src/multiarray/numpyos.h | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_longdouble.py | 219 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 4 |
8 files changed, 479 insertions, 22 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 253dad5b6..68efd1791 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -104,7 +104,7 @@ MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", "copysign", "nextafter", "ftello", "fseeko", - "strtoll", "strtoull", "cbrt"] + "strtoll", "strtoull", "cbrt", "strtold_l",] OPTIONAL_HEADERS = [ diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 5a1e2f4d5..d120163a0 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -33,7 +33,6 @@ #include "npy_cblas.h" #include <limits.h> - /* ***************************************************************************** ** PYTHON TYPES TO C TYPES ** @@ -309,6 +308,54 @@ static int /**end repeat**/ +static NPY_INLINE npy_longdouble +string_to_long_double(PyObject*op) +{ + char *s; + char *end; + npy_longdouble temp; + PyObject* b; + + if (PyUnicode_Check(op)) { + b = PyUnicode_AsUTF8String(op); + if (!b) { + return 0; + } + } + else { + b = op; + Py_XINCREF(b); + } + s = PyBytes_AsString(b); + if (s) { + errno = 0; + temp = NumPyOS_ascii_strtold(s, &end); + if (end==s || *end) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long double: %s", + s); + Py_XDECREF(b); + return 0; + } + else if (errno) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long double: %s (%s)", + s, + strerror(errno)); + Py_XDECREF(b); + return 0; + } + Py_XDECREF(b); + } + else { + /* Probably wasn't a string, try converting it via a python double */ + PyErr_Clear(); + Py_XDECREF(b); + temp = (npy_longdouble) MyPyFloat_AsDouble(op); + } + return temp; +} + /* * These return array scalars which are different than other date-types. */ @@ -330,7 +377,11 @@ LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap) temp = ((PyLongDoubleScalarObject *)op)->obval; } else { - temp = (npy_longdouble) MyPyFloat_AsDouble(op); + /* In case something funny happened in PyArray_IsScalar */ + if (PyErr_Occurred()) { + return -1; + } + temp = string_to_long_double(op); } if (PyErr_Occurred()) { return -1; @@ -1572,8 +1623,8 @@ static int /**end repeat**/ /**begin repeat - * #fname = FLOAT, DOUBLE, LONGDOUBLE# - * #type = npy_float, npy_double, npy_longdouble# + * #fname = FLOAT, DOUBLE# + * #type = npy_float, npy_double# */ static int @fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore), @@ -1589,6 +1640,18 @@ static int /**end repeat**/ static int +LONGDOUBLE_scan(FILE *fp, npy_longdouble *ip, void *NPY_UNUSED(ignore), + PyArray_Descr *NPY_UNUSED(ignored)) +{ + long double result; + int ret; + + ret = NumPyOS_ascii_ftoLf(fp, &result); + *ip = (npy_longdouble) result; + return ret; +} + +static int HALF_scan(FILE *fp, npy_half *ip, void *NPY_UNUSED(ignore), PyArray_Descr *NPY_UNUSED(ignored)) { @@ -1675,8 +1738,8 @@ static int /**begin repeat * - * #fname = FLOAT, DOUBLE, LONGDOUBLE# - * #type = npy_float, npy_double, npy_longdouble# + * #fname = FLOAT, DOUBLE# + * #type = npy_float, npy_double# */ static int @fname@_fromstr(char *str, void *ip, char **endptr, @@ -1691,6 +1754,17 @@ static int /**end repeat**/ static int +LONGDOUBLE_fromstr(char *str, void *ip, char **endptr, + PyArray_Descr *NPY_UNUSED(ignore)) +{ + long double result; + + result = NumPyOS_ascii_strtold(str, endptr); + *(npy_longdouble *)ip = result; + return 0; +} + +static int HALF_fromstr(char *str, void *ip, char **endptr, PyArray_Descr *NPY_UNUSED(ignore)) { diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c index 35958745c..7cb27581a 100644 --- a/numpy/core/src/multiarray/convert.c +++ b/numpy/core/src/multiarray/convert.c @@ -171,7 +171,7 @@ PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format) /* * standard writing */ - strobj = PyObject_Str(obj); + strobj = PyObject_Repr(obj); Py_DECREF(obj); if (strobj == NULL) { Py_DECREF(it); diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 1eca908f5..e23cbe3c9 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -45,7 +45,18 @@ static int fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype, const char *end) { - int r = dtype->f->fromstr(*s, dptr, s, dtype); + char *e = *s; + int r = dtype->f->fromstr(*s, dptr, &e, dtype); + /* + * fromstr always returns 0 for basic dtypes + * s points to the end of the parsed string + * if an error occurs s is not changed + */ + if (*s == e) { + /* Nothing read */ + return -1; + } + *s = e; if (end != NULL && *s > end) { return -1; } @@ -57,7 +68,14 @@ fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype, void *NPY_UNUSED(stream_data)) { /* the NULL argument is for backwards-compatibility */ - return dtype->f->scanfunc(*fp, dptr, NULL, dtype); + int r = dtype->f->scanfunc(*fp, dptr, NULL, dtype); + /* r can be EOF or the number of items read (0 or 1) */ + if (r == 1) { + return 0; + } + else { + return -1; + } } /* @@ -3279,6 +3297,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread, dptr = PyArray_DATA(r); for (i= 0; num < 0 || i < num; i++) { if (next(&stream, dptr, dtype, stream_data) < 0) { + /* EOF */ break; } *nread += 1; diff --git a/numpy/core/src/multiarray/numpyos.c b/numpy/core/src/multiarray/numpyos.c index dddead7ea..0b59d4600 100644 --- a/numpy/core/src/multiarray/numpyos.c +++ b/numpy/core/src/multiarray/numpyos.c @@ -13,6 +13,13 @@ #include "npy_pycompat.h" +#ifdef HAVE_STRTOLD_L +#include <stdlib.h> +#include <xlocale.h> +#endif + + + /* * From the C99 standard, section 7.19.6: The exponent always contains at least * two digits, and only as many more digits as necessary to represent the @@ -451,6 +458,17 @@ NumPyOS_ascii_strtod(const char *s, char** endptr) struct lconv *locale_data = localeconv(); const char *decimal_point = locale_data->decimal_point; size_t decimal_point_len = strlen(decimal_point); + /* + * It is unclear why this function needs locale information. + * The goal is to do locale-independent parsing. This function + * contains code to stop the parsing early if an unusual decimal + * point is encountered and then pass the string so far to + * NumPyOS_ascii_strtod_plain, which will also stop if an unusual + * decimal point is encountered. The only case I can think of + * where this would matter is if the decimal separator were + * something that could occur in the middle of a valid number, + * in which case this function does the wrong thing. + */ char buffer[FLOAT_FORMATBUFLEN+1]; const char *p; @@ -539,13 +557,88 @@ NumPyOS_ascii_strtod(const char *s, char** endptr) return NumPyOS_ascii_strtod_plain(s, endptr); } +NPY_NO_EXPORT long double +NumPyOS_ascii_strtold(const char *s, char** endptr) +{ + const char *p; + long double result; +#ifdef HAVE_STRTOLD_L + locale_t clocale; +#endif + + while (NumPyOS_ascii_isspace(*s)) { + ++s; + } + + /* + * ##1 + * + * Recognize POSIX inf/nan representations on all platforms. + */ + p = s; + result = 1.0; + if (*p == '-') { + result = -1.0; + ++p; + } + else if (*p == '+') { + ++p; + } + if (NumPyOS_ascii_strncasecmp(p, "nan", 3) == 0) { + p += 3; + if (*p == '(') { + ++p; + while (NumPyOS_ascii_isalnum(*p) || *p == '_') { + ++p; + } + if (*p == ')') { + ++p; + } + } + if (endptr != NULL) { + *endptr = (char*)p; + } + return NPY_NAN; + } + else if (NumPyOS_ascii_strncasecmp(p, "inf", 3) == 0) { + p += 3; + if (NumPyOS_ascii_strncasecmp(p, "inity", 5) == 0) { + p += 5; + } + if (endptr != NULL) { + *endptr = (char*)p; + } + return result*NPY_INFINITY; + } + /* End of ##1 */ + +#ifdef HAVE_STRTOLD_L + clocale = newlocale(LC_ALL_MASK, "C", NULL); + if (clocale) { + errno = 0; + result = strtold_l(s, endptr, clocale); + freelocale(clocale); + if (errno) { + *endptr = (char*)s; + } + } + else { + *endptr = (char*)s; + result = 0; + } + return result; +#else + return NumPyOS_ascii_strtod_plain(s, endptr); +#endif +} /* - * NumPyOS_ascii_ftolf: + * read_numberlike_string: * * fp: FILE pointer * * value: Place to store the value read * - * Similar to PyOS_ascii_strtod, except that it reads input from a file. + * Read what looks like valid numeric input and store it in a buffer + * for later parsing as a number. * * Similarly to fscanf, this function always consumes leading whitespace, * and any text that could be the leading part in valid input. @@ -555,17 +648,17 @@ NumPyOS_ascii_strtod(const char *s, char** endptr) * * 1 if a number read, * * EOF if end-of-file met before reading anything. */ -NPY_NO_EXPORT int -NumPyOS_ascii_ftolf(FILE *fp, double *value) +static int +read_numberlike_string(FILE *fp, char *buffer, size_t buflen) { - char buffer[FLOAT_FORMATBUFLEN + 1]; + char *endp; char *p; int c; int ok; /* - * Pass on to PyOS_ascii_strtod the leftmost matching part in regexp + * Fill buffer with the leftmost matching part in regexp * * \s*[+-]? ( [0-9]*\.[0-9]+([eE][+-]?[0-9]+) * | nan ( \([:alphanum:_]*\) )? @@ -583,7 +676,7 @@ NumPyOS_ascii_ftolf(FILE *fp, double *value) #define NEXT_CHAR() \ do { \ - if (c == EOF || endp >= buffer + FLOAT_FORMATBUFLEN) \ + if (c == EOF || endp >= buffer + buflen - 1) \ END_MATCH(); \ *endp++ = (char)c; \ c = getc(fp); \ @@ -668,11 +761,8 @@ buffer_filled: ungetc(c, fp); *endp = '\0'; - /* 5. try to convert buffer. */ - *value = NumPyOS_ascii_strtod(buffer, &p); - /* return 1 if something read, else 0 */ - return (buffer == p) ? 0 : 1; + return (buffer == endp) ? 0 : 1; } #undef END_MATCH @@ -681,3 +771,50 @@ buffer_filled: #undef MATCH_ONE_OR_NONE #undef MATCH_ONE_OR_MORE #undef MATCH_ZERO_OR_MORE + +/* + * NumPyOS_ascii_ftolf: + * * fp: FILE pointer + * * value: Place to store the value read + * + * Similar to PyOS_ascii_strtod, except that it reads input from a file. + * + * Similarly to fscanf, this function always consumes leading whitespace, + * and any text that could be the leading part in valid input. + * + * Return value: similar to fscanf. + * * 0 if no number read, + * * 1 if a number read, + * * EOF if end-of-file met before reading anything. + */ +NPY_NO_EXPORT int +NumPyOS_ascii_ftolf(FILE *fp, double *value) +{ + char buffer[FLOAT_FORMATBUFLEN + 1]; + char *p; + int r; + + r = read_numberlike_string(fp, buffer, FLOAT_FORMATBUFLEN+1); + + if (r != EOF && r != 0) { + *value = NumPyOS_ascii_strtod(buffer, &p); + r = (p == buffer) ? 0 : 1; + } + return r; +} + +NPY_NO_EXPORT int +NumPyOS_ascii_ftoLf(FILE *fp, long double *value) +{ + char buffer[FLOAT_FORMATBUFLEN + 1]; + char *p; + int r; + + r = read_numberlike_string(fp, buffer, FLOAT_FORMATBUFLEN+1); + + if (r != EOF && r != 0) { + *value = NumPyOS_ascii_strtold(buffer, &p); + r = (p == buffer) ? 0 : 1; + } + return r; +} diff --git a/numpy/core/src/multiarray/numpyos.h b/numpy/core/src/multiarray/numpyos.h index 6f247e608..3bf77391e 100644 --- a/numpy/core/src/multiarray/numpyos.h +++ b/numpy/core/src/multiarray/numpyos.h @@ -19,10 +19,16 @@ NumPyOS_ascii_formatl(char *buffer, size_t buf_size, NPY_NO_EXPORT double NumPyOS_ascii_strtod(const char *s, char** endptr); +NPY_NO_EXPORT long double +NumPyOS_ascii_strtold(const char *s, char** endptr); + NPY_NO_EXPORT int NumPyOS_ascii_ftolf(FILE *fp, double *value); NPY_NO_EXPORT int +NumPyOS_ascii_ftoLf(FILE *fp, long double *value); + +NPY_NO_EXPORT int NumPyOS_ascii_isspace(char c); #endif diff --git a/numpy/core/tests/test_longdouble.py b/numpy/core/tests/test_longdouble.py new file mode 100644 index 000000000..9cc0f3948 --- /dev/null +++ b/numpy/core/tests/test_longdouble.py @@ -0,0 +1,219 @@ +from __future__ import division, absolute_import, print_function + +import locale +from tempfile import NamedTemporaryFile +import unittest + +import numpy as np +from numpy.testing import ( + run_module_suite, assert_, assert_equal, dec, assert_raises, + assert_array_equal +) +from numpy.compat import sixu +from test_print import in_foreign_locale + +longdouble_longer_than_double = (np.finfo(np.longdouble).eps + < np.finfo(np.double).eps) + + +def test_scalar_extraction(): + """Confirm that extracting a value doesn't convert to python float""" + o = 1 + np.finfo(np.longdouble).eps + a = np.array([o, o, o]) + assert_equal(a[1], o) + + +# Conversions string -> long double + + +def test_repr_roundtrip(): + o = 1 + np.finfo(np.longdouble).eps + assert_equal(np.longdouble(repr(o)), o, + "repr was %s" % repr(o)) + + +def test_unicode(): + np.longdouble(sixu("1.2")) + + +def test_string(): + np.longdouble("1.2") + + +def test_bytes(): + np.longdouble(b"1.2") + + +@in_foreign_locale +def test_fromstring_foreign(): + f = 1.234 + a = np.fromstring(repr(f), dtype=float, sep=" ") + assert_equal(a[0], f) + + +def test_repr_roundtrip_bytes(): + o = 1 + np.finfo(np.longdouble).eps + assert_equal(np.longdouble(repr(o).encode("ascii")), o) + + +@in_foreign_locale +def test_repr_roundtrip_foreign(): + o = 1.5 + assert_equal(o, np.longdouble(repr(o))) + + +def test_bogus_string(): + assert_raises(ValueError, np.longdouble, "spam") + assert_raises(ValueError, np.longdouble, "1.0 flub") + + +def test_underflow_zero(): + z = np.finfo + + +def test_fromstring(): + o = 1 + np.finfo(np.longdouble).eps + s = (" " + repr(o))*5 + a = np.array([o]*5) + assert_equal(np.fromstring(s, sep=" ", dtype=np.longdouble), a, + err_msg="reading '%s'" % s) + + +def test_loadtxt(): + o = 1 + np.finfo(np.longdouble).eps + f = NamedTemporaryFile(mode="wt") + for i in range(5): + f.write(repr(o) + "\n") + f.flush() + a = np.array([o]*5) + assert_equal(np.loadtxt(f.name, dtype=np.longdouble), a) + f.close() + + +def test_genfromtxt(): + o = 1 + np.finfo(np.longdouble).eps + f = NamedTemporaryFile(mode="wt") + for i in range(5): + f.write(repr(o) + "\n") + f.flush() + a = np.array([o]*5) + assert_equal(np.genfromtxt(f.name, dtype=np.longdouble), a) + f.close() + + +def test_fromfile(): + o = 1 + np.finfo(np.longdouble).eps + f = NamedTemporaryFile(mode="wt") + for i in range(5): + f.write(repr(o) + "\n") + f.flush() + a = np.array([o]*5) + F = open(f.name, "rt") + b = np.fromfile(F, + dtype=np.longdouble, + sep="\n") + F.close() + F = open(f.name, "rt") + s = F.read() + F.close() + f.close() + assert_equal(b, a, err_msg="decoded %s as %s" % (repr(s), repr(b))) + + +def test_fromstring_bogus(): + assert_equal(np.fromstring("1. 2. 3. flop 4.", dtype=float, sep=" "), + np.array([1., 2., 3.])) + + +def test_fromstring_empty(): + assert_equal(np.fromstring("xxxxx", sep="x"), + np.array([])) + + +def test_fromstring_missing(): + assert_equal(np.fromstring("1xx3x4x5x6", sep="x"), + np.array([1])) + + +class FileBased(unittest.TestCase): + def setUp(self): + self.o = 1 + np.finfo(np.longdouble).eps + self.f = NamedTemporaryFile(mode="wt") + + def tearDown(self): + self.f.close() + del self.f + + def test_fromfile_bogus(self): + self.f.write("1. 2. 3. flop 4.\n") + self.f.flush() + F = open(self.f.name, "rt") + try: + assert_equal(np.fromfile(F, dtype=float, sep=" "), + np.array([1., 2., 3.])) + finally: + F.close() + + def test_tofile_roundtrip(self): + a = np.array([self.o]*3) + a.tofile(self.f.name, sep=" ") + F = open(self.f.name, "rt") + try: + assert_equal(np.fromfile(F, dtype=np.longdouble, sep=" "), + a) + finally: + F.close() + + +@in_foreign_locale +def test_fromstring_foreign(): + s = "1.234" + a = np.fromstring(s, dtype=np.longdouble, sep=" ") + assert_equal(a[0], np.longdouble(s)) + + +@in_foreign_locale +def test_fromstring_foreign_sep(): + a = np.array([1, 2, 3, 4]) + b = np.fromstring("1,2,3,4,", dtype=np.longdouble, sep=",") + assert_array_equal(a, b) + + +@in_foreign_locale +def test_fromstring_foreign_value(): + b = np.fromstring("1,234", dtype=np.longdouble, sep=" ") + assert_array_equal(b[0], 1) + + +# Conversions long double -> string + + +def test_repr_exact(): + o = 1 + np.finfo(np.longdouble).eps + assert_(repr(o) != '1') + + +@dec.knownfailureif(longdouble_longer_than_double, "BUG #2376") +def test_format(): + o = 1 + np.finfo(np.longdouble).eps + assert_("{0:.40g}".format(o) != '1') + + +@dec.knownfailureif(longdouble_longer_than_double, "BUG #2376") +def test_percent(): + o = 1 + np.finfo(np.longdouble).eps + assert_("%.40g" % o != '1') + + +@dec.knownfailureif(longdouble_longer_than_double, "array repr problem") +def test_array_repr(): + o = 1 + np.finfo(np.longdouble).eps + a = np.array([o]) + b = np.array([1], dtype=np.longdouble) + if not np.all(a != b): + raise ValueError("precision loss creating arrays") + assert_(repr(a) != repr(b)) + + +if __name__ == "__main__": + run_module_suite() diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 4ef419e96..3828b65d4 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -3411,7 +3411,9 @@ class TestIO(object): f = open(self.filename, 'r') s = f.read() f.close() - assert_equal(s, '1.51,2.0,3.51,4.0') + #assert_equal(s, '1.51,2.0,3.51,4.0') + y = np.array([float(p) for p in s.split(',')]) + assert_array_equal(x,y) def test_tofile_format(self): x = np.array([1.51, 2, 3.51, 4], dtype=float) |