gh19010: fix returns for Perl_grok_infnan

Consistently honour what the docs have always promised: If an infinity or a not-a-number is recognized, C<*sp> will point to one byte past the end of the recognized string. If the recognition fails, zero is returned, and C<*sp> will not move. Additionally, restore Perl_grok_number_flags to allowing inf/nan with trailing garbage only when called with PERL_SCAN_TRAILING; add notes to the other two core callers to clarify that they always accept such trailing garbage. A small number of XS-APItest tests were modified to reflect the stricter behaviour: "Infin" and "nanx" are now invalid without PERL_SCAN_TRAILING.
author: Hugo van der Sanden <hv@crypt.org> 2021-08-15 15:34:12 +0100
committer: Karl Williamson <khw@cpan.org> 2022-03-04 10:31:41 -0700
commit: 7294e9f9f09f98d7bff4bfdae266a8f8121f907c (patch)
tree: efe6f789a51e63bb20ec5529a4567fb167d1bcd2 /numeric.c
parent: 98656496a12e0020bb1e4b971adc847ab7698c9d (diff)
download: perl-7294e9f9f09f98d7bff4bfdae266a8f8121f907c.tar.gz
1 files changed, 81 insertions, 78 deletions
diff --git a/numeric.c b/numeric.c
index a426d97fe5..d3f0d45fc4 100644
--- a/numeric.c
+++ b/numeric.c
@@ -771,15 +771,15 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
         s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
         s++; if (s == send) return 0;
         if (isALPHA_FOLD_EQ(*s, 'F')) {
-            s++;
+            flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
+            *sp = ++s;
             if (s < send && (isALPHA_FOLD_EQ(*s, 'I'))) {
-                int fail =
-                    flags | IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT | IS_NUMBER_TRAILING;
-                s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return fail;
-                s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return fail;
-                s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return fail;
-                s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return fail;
-                s++;
+                int trail = flags | IS_NUMBER_TRAILING;
+                s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return trail;
+                s++; if (s == send || isALPHA_FOLD_NE(*s, 'I')) return trail;
+                s++; if (s == send || isALPHA_FOLD_NE(*s, 'T')) return trail;
+                s++; if (s == send || isALPHA_FOLD_NE(*s, 'Y')) return trail;
+                *sp = ++s;
             } else if (odh) {
                 while (s < send && *s == '0') { /* 1.#INF00 */
                     s++;
@@ -787,10 +787,8 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
             }
             while (s < send && isSPACE(*s))
                 s++;
-            if (s < send && *s) {
-                flags |= IS_NUMBER_TRAILING;
-            }
-            flags |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
+            *sp = s;
+            return flags | (s < send ? IS_NUMBER_TRAILING : 0);
         }
         else if (isALPHA_FOLD_EQ(*s, 'D') && odh) { /* 1.#IND */
             s++;
@@ -798,9 +796,8 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
             while (s < send && *s == '0') { /* 1.#IND00 */
                 s++;
             }
-            if (s < send && *s) {
-                flags |= IS_NUMBER_TRAILING;
-            }
+            *sp = s;
+            return flags | (s < send ? IS_NUMBER_TRAILING : 0);
         } else
             return 0;
     }
@@ -816,9 +813,9 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
         if (isALPHA_FOLD_EQ(*s, 'N')) {
             s++; if (s == send || isALPHA_FOLD_NE(*s, 'A')) return 0;
             s++; if (s == send || isALPHA_FOLD_NE(*s, 'N')) return 0;
-            s++;
-
             flags |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
+            *sp = ++s;
+
             if (s == send) {
                 return flags;
             }
@@ -832,7 +829,7 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
                 isALPHA_FOLD_EQ(*s, 's')) {
                 /* "nanq" or "nans" are ok, though generating
                  * these portably is tricky. */
-                s++;
+                *sp = ++s;
                 if (s == send) {
                     return flags;
                 }
@@ -840,17 +837,14 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
             if (*s == '(') {
                 /* C99 style "nan(123)" or Perlish equivalent "nan($uv)". */
                 const char *t;
+                int trail = flags | IS_NUMBER_TRAILING;
                 s++;
-                if (s == send) {
-                    return flags | IS_NUMBER_TRAILING;
-                }
+                if (s == send) { return trail; }
                 t = s + 1;
                 while (t < send && *t && *t != ')') {
                     t++;
                 }
-                if (t == send) {
-                    return flags | IS_NUMBER_TRAILING;
-                }
+                if (t == send) { return trail; }
                 if (*t == ')') {
                     int nantype;
                     UV nanval;
@@ -900,80 +894,78 @@ Perl_grok_infnan(pTHX_ const char** sp, const char* send)
                     /* XXX Doesn't do octal: nan("0123").
                      * Probably not a big loss. */
 
+                    /* XXX the nanval is currently unused, that is,
+                     * not inserted as the NaN payload of the NV.
+                     * But the above code already parses the C99
+                     * nan(...)  format.  See below, and see also
+                     * the nan() in POSIX.xs.
+                     *
+                     * Certain configuration combinations where
+                     * NVSIZE is greater than UVSIZE mean that
+                     * a single UV cannot contain all the possible
+                     * NaN payload bits.  There would need to be
+                     * some more generic syntax than "nan($uv)".
+                     *
+                     * Issues to keep in mind:
+                     *
+                     * (1) In most common cases there would
+                     * not be an integral number of bytes that
+                     * could be set, only a certain number of bits.
+                     * For example for the common case of
+                     * NVSIZE == UVSIZE == 8 there is room for 52
+                     * bits in the payload, but the most significant
+                     * bit is commonly reserved for the
+                     * signaling/quiet bit, leaving 51 bits.
+                     * Furthermore, the C99 nan() is supposed
+                     * to generate quiet NaNs, so it is doubtful
+                     * whether it should be able to generate
+                     * signaling NaNs.  For the x86 80-bit doubles
+                     * (if building a long double Perl) there would
+                     * be 62 bits (s/q bit being the 63rd).
+                     *
+                     * (2) Endianness of the payload bits. If the
+                     * payload is specified as an UV, the low-order
+                     * bits of the UV are naturally little-endianed
+                     * (rightmost) bits of the payload.  The endianness
+                     * of UVs and NVs can be different. */
+
                     if ((nantype & IS_NUMBER_NOT_INT) ||
                         !(nantype && IS_NUMBER_IN_UV)) {
-                        /* XXX the nanval is currently unused, that is,
-                         * not inserted as the NaN payload of the NV.
-                         * But the above code already parses the C99
-                         * nan(...)  format.  See below, and see also
-                         * the nan() in POSIX.xs.
-                         *
-                         * Certain configuration combinations where
-                         * NVSIZE is greater than UVSIZE mean that
-                         * a single UV cannot contain all the possible
-                         * NaN payload bits.  There would need to be
-                         * some more generic syntax than "nan($uv)".
-                         *
-                         * Issues to keep in mind:
-                         *
-                         * (1) In most common cases there would
-                         * not be an integral number of bytes that
-                         * could be set, only a certain number of bits.
-                         * For example for the common case of
-                         * NVSIZE == UVSIZE == 8 there is room for 52
-                         * bits in the payload, but the most significant
-                         * bit is commonly reserved for the
-                         * signaling/quiet bit, leaving 51 bits.
-                         * Furthermore, the C99 nan() is supposed
-                         * to generate quiet NaNs, so it is doubtful
-                         * whether it should be able to generate
-                         * signaling NaNs.  For the x86 80-bit doubles
-                         * (if building a long double Perl) there would
-                         * be 62 bits (s/q bit being the 63rd).
-                         *
-                         * (2) Endianness of the payload bits. If the
-                         * payload is specified as an UV, the low-order
-                         * bits of the UV are naturally little-endianed
-                         * (rightmost) bits of the payload.  The endianness
-                         * of UVs and NVs can be different. */
-                        return 0;
+                        /* treat "NaN(invalid)" the same as "NaNgarbage" */
+                        return trail;
                     }
-                    if (s < t) {
-                        flags |= IS_NUMBER_TRAILING;
+                    else {
+                        *sp = t + 1;
+                        return (s + 1 < t || t + 1 < send) ? trail : flags;
                     }
                 } else {
                     /* Looked like nan(...), but no close paren. */
-                    flags |= IS_NUMBER_TRAILING;
+                    return trail;
                 }
             } else {
                 while (s < send && isSPACE(*s))
                     s++;
-                if (s < send && *s) {
-                    /* Note that we here implicitly accept (parse as
-                     * "nan", but with warnings) also any other weird
-                     * trailing stuff for "nan".  In the above we just
-                     * check that if we got the C99-style "nan(...)",
-                     * the "..."  looks sane.
-                     * If in future we accept more ways of specifying
-                     * the nan payload, the accepting would happen around
-                     * here. */
-                    flags |= IS_NUMBER_TRAILING;
-                }
+                /* Note that we here implicitly accept (parse as
+                 * "nan", but with warnings) also any other weird
+                 * trailing stuff for "nan".  In the above we just
+                 * check that if we got the C99-style "nan(...)",
+                 * the "..."  looks sane.
+                 * If in future we accept more ways of specifying
+                 * the nan payload, the accepting would happen around
+                 * here. */
+                *sp = s;
+                return flags | (s < send ? IS_NUMBER_TRAILING : 0);
             }
-            s = send;
         }
         else
             return 0;
     }
 
-    while (s < send && isSPACE(*s))
-        s++;
-
 #else
     PERL_UNUSED_ARG(send);
-#endif /* #if defined(NV_INF) || defined(NV_NAN) */
     *sp = s;
     return flags;
+#endif /* #if defined(NV_INF) || defined(NV_NAN) */
 }
 
 /*
@@ -1245,6 +1237,10 @@ Perl_grok_number_flags(pTHX_ const char *pv, STRLEN len, UV *valuep, U32 flags)
       /* Really detect inf/nan. Start at d, not s, since the above
        * code might have already consumed the "1." or "1". */
       const int infnan = Perl_grok_infnan(aTHX_ &d, send);
+
+      if ((infnan & IS_NUMBER_TRAILING) && !(flags & PERL_SCAN_TRAILING)) {
+          return 0;
+      }
       if ((infnan & IS_NUMBER_INFINITY)) {
           return (numtype | infnan); /* Keep sign for infinity. */
       }
@@ -1536,6 +1532,9 @@ S_my_atof_infnan(pTHX_ const char* s, bool negative, const char* send, NV* value
     const char *p0 = negative ? s - 1 : s;
     const char *p = p0;
     const int infnan = grok_infnan(&p, send);
+    /* We act like PERL_SCAN_TRAILING here to permit trailing garbage,
+     * it is not clear if that is desirable.
+     */
     if (infnan && p != p0) {
         /* If we can generate inf/nan directly, let's do so. */
 #ifdef NV_INF
@@ -1889,6 +1888,10 @@ Checks whether the argument would be either an infinity or C<NaN> when used
 as a number, but is careful not to trigger non-numeric or uninitialized
 warnings.  it assumes the caller has done C<SvGETMAGIC(sv)> already.
 
+Note that this always accepts trailing garbage (similar to C<grok_number_flags>
+with C<PERL_SCAN_TRAILING>), so C<"inferior"> and C<"NAND gates"> will
+return true.
+
 =cut
 */
author	Hugo van der Sanden <hv@crypt.org>	2021-08-15 15:34:12 +0100
committer	Karl Williamson <khw@cpan.org>	2022-03-04 10:31:41 -0700
commit	7294e9f9f09f98d7bff4bfdae266a8f8121f907c (patch)
tree	efe6f789a51e63bb20ec5529a4567fb167d1bcd2 /numeric.c
parent	98656496a12e0020bb1e4b971adc847ab7698c9d (diff)
download	perl-7294e9f9f09f98d7bff4bfdae266a8f8121f907c.tar.gz