diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2014-08-18 12:41:41 -0400 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2014-08-20 09:33:11 -0400 |
commit | ff4eb3984da9fdf3cec4f01cf752e4e7da44139f (patch) | |
tree | 9474354cb882578cd450a6b74a9f4d4eff6ee0bd | |
parent | 8b7fad815cf65ab870e666844e22045c74803f64 (diff) | |
download | perl-ff4eb3984da9fdf3cec4f01cf752e4e7da44139f.tar.gz |
Separate grok_infnan() from grok_number().
Remaining issues:
(1) would need tests, but there are two problems: [a] generating inf/nan
reliably and testing for it from Perl level is hard (see items (2) and
(3) below), and [b] the behavior of various systems with especially NaN
differs (some platforms might throw SIGFPEs).
(2) toke.c:scan_number() will not call this code (via grok_number)
because "NaN" or "Inf" do not look at all like floats to it.
(3) Even as we now recognize these forms, the native strtod()
might not (problem of cross-portability of these exceptional
forms: Win32 outputs e.g. "1.#INF", what Linux reading this should do,
or conversely Linux outputs "Inf", what should Win32 do?)
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | numeric.c | 133 | ||||
-rw-r--r-- | proto.h | 6 |
4 files changed, 120 insertions, 21 deletions
@@ -803,6 +803,7 @@ EMsPR |char*|form_short_octal_warning|NN const char * const s \ |const STRLEN len #endif Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result +Apdn |int |grok_infnan |NN const char** sp|NN const char *send Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send @@ -173,6 +173,7 @@ #define grok_atou Perl_grok_atou #define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d) #define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d) +#define grok_infnan Perl_grok_infnan #define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c) #define grok_number_flags(a,b,c,d) Perl_grok_number_flags(aTHX_ a,b,c,d) #define grok_numeric_radix(a,b) Perl_grok_numeric_radix(aTHX_ a,b) @@ -586,6 +586,103 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) return grok_number_flags(pv, len, valuep, 0); } +/* +=for apidoc grok_infnan + +Helper for grok_number(), accepts various ways of spelling "infinity" +or "not a number", and returns one of the following flag combinations: + + IS_NUMBER_INFINITE + IS_NUMBER_NAN + IS_NUMBER_INFINITE | IS_NUMBER_NEG + IS_NUMBER_NAN | IS_NUMBER_NEG + 0 + +If an infinity or not-a-number is recognized, the *sp will point to +one past the end of the recognized string. If the recognition fails, +zero is returned, and the *sp will not move. + +=cut +*/ + +int +Perl_grok_infnan(const char** sp, const char* send) +{ + const char* s = *sp; + int flags = 0; + + PERL_ARGS_ASSERT_GROK_INFNAN; + + if (*s == '-') { + flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */ + s++; if (s == send) return 0; + } + + if (*s == '1') { + /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1#.IND (maybe also 1.#NAN) */ + s++; if (s == send) return 0; + if (*s == '.') { + s++; if (s == send) return 0; + } + if (*s == '#') { + s++; if (s == send) return 0; + } else + return 0; + } + + if (*s == 'I' || *s == 'i') { + /* INF or IND (1.#IND is indeterminate, a certain type of NAN) */ + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send) return 0; + if (*s == 'F' || *s == 'f') { + s++; + if (s < send && (*s == 'I' || *s == 'i')) { + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; + s++; if (s == send || (*s != 'T' && *s != 't')) return 0; + /* XXX maybe also grok "infinite"? */ + s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; + s++; + } else if (*s) + return 0; + flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + } + else if (*s == 'D' || *s == 'd') { + s++; + flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; + } else + return 0; + + *sp = s; + return flags; + } + else { + /* NAN */ + if (*s == 'S' || *s == 's' || *s == 'Q' || *s == 'q') { + /* snan, qNaN */ + /* XXX do something with the snan/qnan difference */ + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + } + + if (*s == 'N' || *s == 'n') { + s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; + s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; + s++; + + /* NaN can be followed by various stuff since there are + * multiple different NaN values, and some implementations + * output the "payload" values, e.g. NaN123. */ + + flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; + } + + *sp = s; + return flags; + } + + return 0; +} + static const UV uv_max_div_10 = UV_MAX / 10; static const U8 uv_max_mod_10 = UV_MAX % 10; @@ -724,31 +821,25 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) } else return 0; - } else if (*s == 'I' || *s == 'i') { - s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; - s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; - s++; if (s < send && (*s == 'I' || *s == 'i')) { - s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; - s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; - s++; if (s == send || (*s != 'T' && *s != 't')) return 0; - s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; - s++; - } - sawinf = 1; - } else if (*s == 'N' || *s == 'n') { - /* XXX TODO: There are signaling NaNs and quiet NaNs. */ - s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; - s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; - s++; - sawnan = 1; - } else - return 0; + } + else { + int infnan_flags = Perl_grok_infnan(&s, send); + if ((infnan_flags & IS_NUMBER_INFINITY)) { + numtype |= infnan_flags; + sawinf = 1; + } + else if ((infnan_flags & IS_NUMBER_NAN)) { + numtype |= infnan_flags; + sawnan = 1; + } else + return 0; + } if (sawinf) { - numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + /* Keep the sign for infinity. */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { - numtype &= IS_NUMBER_NEG; /* Keep track of sign */ + numtype &= IS_NUMBER_NEG; /* Clear sign for nan. */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ @@ -1308,6 +1308,12 @@ PERL_CALLCONV UV Perl_grok_hex(pTHX_ const char* start, STRLEN* len_p, I32* flag #define PERL_ARGS_ASSERT_GROK_HEX \ assert(start); assert(len_p); assert(flags) +PERL_CALLCONV int Perl_grok_infnan(const char** sp, const char *send) + __attribute__nonnull__(1) + __attribute__nonnull__(2); +#define PERL_ARGS_ASSERT_GROK_INFNAN \ + assert(sp); assert(send) + PERL_CALLCONV int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_GROK_NUMBER \ |