diff options
author | Hugo van der Sanden <hv@crypt.org> | 2021-08-15 15:34:12 +0100 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-03-04 10:31:41 -0700 |
commit | 7294e9f9f09f98d7bff4bfdae266a8f8121f907c (patch) | |
tree | efe6f789a51e63bb20ec5529a4567fb167d1bcd2 /numeric.c | |
parent | 98656496a12e0020bb1e4b971adc847ab7698c9d (diff) | |
download | perl-7294e9f9f09f98d7bff4bfdae266a8f8121f907c.tar.gz |
gh19010: fix returns for Perl_grok_infnan
Consistently honour what the docs have always promised:
If an infinity or a not-a-number is recognized, C<*sp> will point to
one byte past the end of the recognized string. If the recognition fails,
zero is returned, and C<*sp> will not move.
Additionally, restore Perl_grok_number_flags to allowing inf/nan with
trailing garbage only when called with PERL_SCAN_TRAILING; add notes
to the other two core callers to clarify that they always accept such
trailing garbage.
A small number of XS-APItest tests were modified to reflect the stricter
behaviour: "Infin" and "nanx" are now invalid without PERL_SCAN_TRAILING.
Diffstat (limited to 'numeric.c')
-rw-r--r-- | numeric.c | 159 |
1 files changed, 81 insertions, 78 deletions
@@ -771,15 +771,15 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; s++; if (s == send) return 0; if (isALPHA_FOLD_EQ(*s, 'F')) { - s++; + flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + *sp = ++s; if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) { - int fail = - flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING; - s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail; - s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail; - s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail; - s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail; - s++; + int trail = flags | IS_NUMBER_TRAILING; + s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return trail; + s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return trail; + s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return trail; + s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return trail; + *sp = ++s; } else if (odh) { while (s < send && *s == '0') { /* 1.#INF00 */ s++; @@ -787,10 +787,8 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) } while (s < send && isSPACE(*s)) s++; - if (s < send && *s) { - flags |= IS_NUMBER_TRAILING; - } - flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; + *sp = s; + return flags | (s < send ? IS_NUMBER_TRAILING : 0); } else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */ s++; @@ -798,9 +796,8 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) while (s < send && *s == '0') { /* 1.#IND00 */ s++; } - if (s < send && *s) { - flags |= IS_NUMBER_TRAILING; - } + *sp = s; + return flags | (s < send ? IS_NUMBER_TRAILING : 0); } else return 0; } @@ -816,9 +813,9 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) if (isALPHA_FOLD_EQ(*s, 'N')) { s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0; s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0; - s++; - flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; + *sp = ++s; + if (s == send) { return flags; } @@ -832,7 +829,7 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) isALPHA_FOLD_EQ(*s, 's')) { /* "nanq" or "nans" are ok, though generating * these portably is tricky. */ - s++; + *sp = ++s; if (s == send) { return flags; } @@ -840,17 +837,14 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) if (*s == '(') { /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */ const char *t; + int trail = flags | IS_NUMBER_TRAILING; s++; - if (s == send) { - return flags | IS_NUMBER_TRAILING; - } + if (s == send) { return trail; } t = s + 1; while (t < send && *t && *t != ')') { t++; } - if (t == send) { - return flags | IS_NUMBER_TRAILING; - } + if (t == send) { return trail; } if (*t == ')') { int nantype; UV nanval; @@ -900,80 +894,78 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send) /* XXX Doesn't do octal: nan("0123"). * Probably not a big loss. */ + /* XXX the nanval is currently unused, that is, + * not inserted as the NaN payload of the NV. + * But the above code already parses the C99 + * nan(...) format. See below, and see also + * the nan() in POSIX.xs. + * + * Certain configuration combinations where + * NVSIZE is greater than UVSIZE mean that + * a single UV cannot contain all the possible + * NaN payload bits. There would need to be + * some more generic syntax than "nan($uv)". + * + * Issues to keep in mind: + * + * (1) In most common cases there would + * not be an integral number of bytes that + * could be set, only a certain number of bits. + * For example for the common case of + * NVSIZE == UVSIZE == 8 there is room for 52 + * bits in the payload, but the most significant + * bit is commonly reserved for the + * signaling/quiet bit, leaving 51 bits. + * Furthermore, the C99 nan() is supposed + * to generate quiet NaNs, so it is doubtful + * whether it should be able to generate + * signaling NaNs. For the x86 80-bit doubles + * (if building a long double Perl) there would + * be 62 bits (s/q bit being the 63rd). + * + * (2) Endianness of the payload bits. If the + * payload is specified as an UV, the low-order + * bits of the UV are naturally little-endianed + * (rightmost) bits of the payload. The endianness + * of UVs and NVs can be different. */ + if ((nantype & IS_NUMBER_NOT_INT) || !(nantype && IS_NUMBER_IN_UV)) { - /* XXX the nanval is currently unused, that is, - * not inserted as the NaN payload of the NV. - * But the above code already parses the C99 - * nan(...) format. See below, and see also - * the nan() in POSIX.xs. - * - * Certain configuration combinations where - * NVSIZE is greater than UVSIZE mean that - * a single UV cannot contain all the possible - * NaN payload bits. There would need to be - * some more generic syntax than "nan($uv)". - * - * Issues to keep in mind: - * - * (1) In most common cases there would - * not be an integral number of bytes that - * could be set, only a certain number of bits. - * For example for the common case of - * NVSIZE == UVSIZE == 8 there is room for 52 - * bits in the payload, but the most significant - * bit is commonly reserved for the - * signaling/quiet bit, leaving 51 bits. - * Furthermore, the C99 nan() is supposed - * to generate quiet NaNs, so it is doubtful - * whether it should be able to generate - * signaling NaNs. For the x86 80-bit doubles - * (if building a long double Perl) there would - * be 62 bits (s/q bit being the 63rd). - * - * (2) Endianness of the payload bits. If the - * payload is specified as an UV, the low-order - * bits of the UV are naturally little-endianed - * (rightmost) bits of the payload. The endianness - * of UVs and NVs can be different. */ - return 0; + /* treat "NaN(invalid)" the same as "NaNgarbage" */ + return trail; } - if (s < t) { - flags |= IS_NUMBER_TRAILING; + else { + *sp = t + 1; + return (s + 1 < t || t + 1 < send) ? trail : flags; } } else { /* Looked like nan(...), but no close paren. */ - flags |= IS_NUMBER_TRAILING; + return trail; } } else { while (s < send && isSPACE(*s)) s++; - if (s < send && *s) { - /* Note that we here implicitly accept (parse as - * "nan", but with warnings) also any other weird - * trailing stuff for "nan". In the above we just - * check that if we got the C99-style "nan(...)", - * the "..." looks sane. - * If in future we accept more ways of specifying - * the nan payload, the accepting would happen around - * here. */ - flags |= IS_NUMBER_TRAILING; - } + /* Note that we here implicitly accept (parse as + * "nan", but with warnings) also any other weird + * trailing stuff for "nan". In the above we just + * check that if we got the C99-style "nan(...)", + * the "..." looks sane. + * If in future we accept more ways of specifying + * the nan payload, the accepting would happen around + * here. */ + *sp = s; + return flags | (s < send ? IS_NUMBER_TRAILING : 0); } - s = send; } else return 0; } - while (s < send && isSPACE(*s)) - s++; - #else PERL_UNUSED_ARG(send); -#endif /* #if defined(NV_INF) || defined(NV_NAN) */ *sp = s; return flags; +#endif /* #if defined(NV_INF) || defined(NV_NAN) */ } /* @@ -1245,6 +1237,10 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags) /* Really detect inf/nan. Start at d, not s, since the above * code might have already consumed the "1." or "1". */ const int infnan = Perl_grok_infnan(aTHX_ &d, send); + + if ((infnan & IS_NUMBER_TRAILING) && !(flags & PERL_SCAN_TRAILING)) { + return 0; + } if ((infnan & IS_NUMBER_INFINITY)) { return (numtype | infnan); /* Keep sign for infinity. */ } @@ -1536,6 +1532,9 @@ S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value const char *p0 = negative ? s - 1 : s; const char *p = p0; const int infnan = grok_infnan(&p, send); + /* We act like PERL_SCAN_TRAILING here to permit trailing garbage, + * it is not clear if that is desirable. + */ if (infnan && p != p0) { /* If we can generate inf/nan directly, let's do so. */ #ifdef NV_INF @@ -1889,6 +1888,10 @@ Checks whether the argument would be either an infinity or C<NaN> when used as a number, but is careful not to trigger non-numeric or uninitialized warnings. it assumes the caller has done C<SvGETMAGIC(sv)> already. +Note that this always accepts trailing garbage (similar to C<grok_number_flags> +with C<PERL_SCAN_TRAILING>), so C<"inferior"> and C<"NAND gates"> will +return true. + =cut */ |