summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2014-08-18 12:41:41 -0400
committerJarkko Hietaniemi <jhi@iki.fi>2014-08-20 09:33:11 -0400
commitff4eb3984da9fdf3cec4f01cf752e4e7da44139f (patch)
tree9474354cb882578cd450a6b74a9f4d4eff6ee0bd
parent8b7fad815cf65ab870e666844e22045c74803f64 (diff)
downloadperl-ff4eb3984da9fdf3cec4f01cf752e4e7da44139f.tar.gz
Separate grok_infnan() from grok_number().
Remaining issues: (1) would need tests, but there are two problems: [a] generating inf/nan reliably and testing for it from Perl level is hard (see items (2) and (3) below), and [b] the behavior of various systems with especially NaN differs (some platforms might throw SIGFPEs). (2) toke.c:scan_number() will not call this code (via grok_number) because "NaN" or "Inf" do not look at all like floats to it. (3) Even as we now recognize these forms, the native strtod() might not (problem of cross-portability of these exceptional forms: Win32 outputs e.g. "1.#INF", what Linux reading this should do, or conversely Linux outputs "Inf", what should Win32 do?)
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--numeric.c133
-rw-r--r--proto.h6
4 files changed, 120 insertions, 21 deletions
diff --git a/embed.fnc b/embed.fnc
index 0bde316744..97d0d99a2f 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -803,6 +803,7 @@ EMsPR |char*|form_short_octal_warning|NN const char * const s \
|const STRLEN len
#endif
Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
+Apdn |int |grok_infnan |NN const char** sp|NN const char *send
Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
Apd |int |grok_number_flags|NN const char *pv|STRLEN len|NULLOK UV *valuep|U32 flags
ApdR |bool |grok_numeric_radix|NN const char **sp|NN const char *send
diff --git a/embed.h b/embed.h
index 7b8d471df3..be519f205f 100644
--- a/embed.h
+++ b/embed.h
@@ -173,6 +173,7 @@
#define grok_atou Perl_grok_atou
#define grok_bin(a,b,c,d) Perl_grok_bin(aTHX_ a,b,c,d)
#define grok_hex(a,b,c,d) Perl_grok_hex(aTHX_ a,b,c,d)
+#define grok_infnan Perl_grok_infnan
#define grok_number(a,b,c) Perl_grok_number(aTHX_ a,b,c)
#define grok_number_flags(a,b,c,d) Perl_grok_number_flags(aTHX_ a,b,c,d)
#define grok_numeric_radix(a,b) Perl_grok_numeric_radix(aTHX_ a,b)
diff --git a/numeric.c b/numeric.c
index fd9d03b8fc..f179503574 100644
--- a/numeric.c
+++ b/numeric.c
@@ -586,6 +586,103 @@ Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
return grok_number_flags(pv, len, valuep, 0);
}
+/*
+=for apidoc grok_infnan
+
+Helper for grok_number(), accepts various ways of spelling "infinity"
+or "not a number", and returns one of the following flag combinations:
+
+ IS_NUMBER_INFINITE
+ IS_NUMBER_NAN
+ IS_NUMBER_INFINITE | IS_NUMBER_NEG
+ IS_NUMBER_NAN | IS_NUMBER_NEG
+ 0
+
+If an infinity or not-a-number is recognized, the *sp will point to
+one past the end of the recognized string. If the recognition fails,
+zero is returned, and the *sp will not move.
+
+=cut
+*/
+
+int
+Perl_grok_infnan(const char** sp, const char* send)
+{
+ const char* s = *sp;
+ int flags = 0;
+
+ PERL_ARGS_ASSERT_GROK_INFNAN;
+
+ if (*s == '-') {
+ flags |= IS_NUMBER_NEG; /* Yes, -NaN happens. Incorrect but happens. */
+ s++; if (s == send) return 0;
+ }
+
+ if (*s == '1') {
+ /* Visual C: 1.#SNAN, -1.#QNAN, 1#INF, 1#.IND (maybe also 1.#NAN) */
+ s++; if (s == send) return 0;
+ if (*s == '.') {
+ s++; if (s == send) return 0;
+ }
+ if (*s == '#') {
+ s++; if (s == send) return 0;
+ } else
+ return 0;
+ }
+
+ if (*s == 'I' || *s == 'i') {
+ /* INF or IND (1.#IND is indeterminate, a certain type of NAN) */
+ s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
+ s++; if (s == send) return 0;
+ if (*s == 'F' || *s == 'f') {
+ s++;
+ if (s < send && (*s == 'I' || *s == 'i')) {
+ s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
+ s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
+ s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
+ /* XXX maybe also grok "infinite"? */
+ s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
+ s++;
+ } else if (*s)
+ return 0;
+ flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
+ }
+ else if (*s == 'D' || *s == 'd') {
+ s++;
+ flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+ } else
+ return 0;
+
+ *sp = s;
+ return flags;
+ }
+ else {
+ /* NAN */
+ if (*s == 'S' || *s == 's' || *s == 'Q' || *s == 'q') {
+ /* snan, qNaN */
+ /* XXX do something with the snan/qnan difference */
+ s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
+ }
+
+ if (*s == 'N' || *s == 'n') {
+ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
+ s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
+ s++;
+
+ /* NaN can be followed by various stuff since there are
+ * multiple different NaN values, and some implementations
+ * output the "payload" values, e.g. NaN123. */
+
+ flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+ }
+
+ *sp = s;
+ return flags;
+ }
+
+ return 0;
+}
+
static const UV uv_max_div_10 = UV_MAX / 10;
static const U8 uv_max_mod_10 = UV_MAX % 10;
@@ -724,31 +821,25 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
}
else
return 0;
- } else if (*s == 'I' || *s == 'i') {
- s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
- s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
- s++; if (s < send && (*s == 'I' || *s == 'i')) {
- s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
- s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
- s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
- s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
- s++;
- }
- sawinf = 1;
- } else if (*s == 'N' || *s == 'n') {
- /* XXX TODO: There are signaling NaNs and quiet NaNs. */
- s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
- s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
- s++;
- sawnan = 1;
- } else
- return 0;
+ }
+ else {
+ int infnan_flags = Perl_grok_infnan(&s, send);
+ if ((infnan_flags & IS_NUMBER_INFINITY)) {
+ numtype |= infnan_flags;
+ sawinf = 1;
+ }
+ else if ((infnan_flags & IS_NUMBER_NAN)) {
+ numtype |= infnan_flags;
+ sawnan = 1;
+ } else
+ return 0;
+ }
if (sawinf) {
- numtype &= IS_NUMBER_NEG; /* Keep track of sign */
+ /* Keep the sign for infinity. */
numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
} else if (sawnan) {
- numtype &= IS_NUMBER_NEG; /* Keep track of sign */
+ numtype &= IS_NUMBER_NEG; /* Clear sign for nan. */
numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
} else if (s < send) {
/* we can have an optional exponent part */
diff --git a/proto.h b/proto.h
index 19ec194abf..df4b9e24f8 100644
--- a/proto.h
+++ b/proto.h
@@ -1308,6 +1308,12 @@ PERL_CALLCONV UV Perl_grok_hex(pTHX_ const char* start, STRLEN* len_p, I32* flag
#define PERL_ARGS_ASSERT_GROK_HEX \
assert(start); assert(len_p); assert(flags)
+PERL_CALLCONV int Perl_grok_infnan(const char** sp, const char *send)
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(2);
+#define PERL_ARGS_ASSERT_GROK_INFNAN \
+ assert(sp); assert(send)
+
PERL_CALLCONV int Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_GROK_NUMBER \