5 files changed, 57 insertions, 19 deletions
diff --git a/embed.fnc b/embed.fnc
index 5cc73b7978..d6312dcb1e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1683,6 +1683,7 @@ ApdD	|UV	|to_utf8_case	|NN const U8 *p					\
 #if defined(PERL_IN_UTF8_C)
 inRP	|bool	|does_utf8_overflow|NN const U8 * const s|NN const U8 * e
 inRP	|bool	|is_utf8_overlong_given_start_byte_ok|NN const U8 * const s|const STRLEN len
+inRP	|bool	|isFF_OVERLONG	|NN const U8 * const s|const STRLEN len
 sMR	|char *	|unexpected_non_continuation_text			\
 		|NN const U8 * const s					\
 		|STRLEN print_len					\
diff --git a/embed.h b/embed.h
index 1af29175cb..b8ee773505 100644
--- a/embed.h
+++ b/embed.h
@@ -1832,6 +1832,7 @@
 #define _to_utf8_case(a,b,c,d,e,f,g)	S__to_utf8_case(aTHX_ a,b,c,d,e,f,g)
 #define check_locale_boundary_crossing(a,b,c,d)	S_check_locale_boundary_crossing(aTHX_ a,b,c,d)
 #define does_utf8_overflow	S_does_utf8_overflow
+#define isFF_OVERLONG		S_isFF_OVERLONG
 #define is_utf8_common(a,b,c,d)	S_is_utf8_common(aTHX_ a,b,c,d)
 #define is_utf8_cp_above_31_bits	S_is_utf8_cp_above_31_bits
 #define is_utf8_overlong_given_start_byte_ok	S_is_utf8_overlong_given_start_byte_ok
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index 37a1e32185..121c6efac5 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -1731,6 +1731,15 @@ my @tests = (
         'utf8', 0x80000000, (isASCII) ? 7 : $max_bytes,
         nonportable_regex(0x80000000)
     ],
+    [ "highest 32 bit code point",
+        (isASCII)
+         ? "\xfe\x83\xbf\xbf\xbf\xbf\xbf"
+         : I8_to_native("\xff\xa0\xa0\xa0\xa0\xa0\xa0\xa3\xbf\xbf\xbf\xbf\xbf\xbf"),
+        $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT,
+        $UTF8_GOT_ABOVE_31_BIT,
+        'utf8', 0xFFFFFFFF, (isASCII) ? 7 : $max_bytes,
+        nonportable_regex(0xffffffff)
+    ],
     [ "requires at least 32 bits, and use SUPER-type flags, instead of ABOVE_31_BIT",
         (isASCII)
          ? "\xfe\x82\x80\x80\x80\x80\x80"
@@ -1764,7 +1773,20 @@ my @tests = (
     ],
 );
 
-if ($is64bit) {
+if (! $is64bit) {
+    if (isASCII) {
+        no warnings qw{portable overflow};
+        push @tests,
+            [ "Lowest 33 bit code point: overflow",
+                "\xFE\x84\x80\x80\x80\x80\x80",
+                $UTF8_WARN_ABOVE_31_BIT, $UTF8_DISALLOW_ABOVE_31_BIT,
+                $UTF8_GOT_ABOVE_31_BIT,
+                'utf8', 0x100000000, 7,
+                qr/and( is)? not portable/
+            ];
+    }
+}
+else {
     no warnings qw{portable overflow};
     push @tests,
         [ "More than 32 bits",
diff --git a/proto.h b/proto.h
index 1d79c46e2e..670801698d 100644
--- a/proto.h
+++ b/proto.h
@@ -5605,6 +5605,12 @@ PERL_STATIC_INLINE bool	S_does_utf8_overflow(const U8 * const s, const U8 * e)
 #define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW	\
 	assert(s); assert(e)
 
+PERL_STATIC_INLINE bool	S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
+			__attribute__warn_unused_result__
+			__attribute__pure__;
+#define PERL_ARGS_ASSERT_ISFF_OVERLONG	\
+	assert(s)
+
 PERL_STATIC_INLINE bool	S_is_utf8_common(pTHX_ const U8 *const p, SV **swash, const char * const swashname, SV* const invlist)
 			__attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_IS_UTF8_COMMON	\
diff --git a/utf8.c b/utf8.c
index 80bafadea5..8b301b26f8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -442,7 +442,20 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e)
      * that could result in a non-overflowing code point */
 
     PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW;
-    assert(s + UTF8SKIP(s) >= e);
+    assert(s <= e && s + UTF8SKIP(s) >= e);
+
+#if ! defined(UV_IS_QUAD) && ! defined(EBCDIC)
+
+    /* On 32 bit ASCII machines, many overlongs that start with FF don't
+     * overflow */
+
+    if (isFF_OVERLONG(s, e - s)) {
+        const U8 max_32_bit_overlong[] = "\xFF\x80\x80\x80\x80\x80\x80\x84";
+        return memGE(s, max_32_bit_overlong,
+                                    MIN(e - s, sizeof(max_32_bit_overlong)));
+    }
+
+#endif
 
     for (x = s; x < e; x++, y++) {
 
@@ -521,27 +534,22 @@ S_is_utf8_overlong_given_start_byte_ok(const U8 * const s, const STRLEN len)
         return TRUE;
     }
 
-#   if defined(UV_IS_QUAD) || defined(EBCDIC)
+    /* Check for the FF overlong */
+    return isFF_OVERLONG(s, len);
+}
+
+PERL_STATIC_INLINE bool
+S_isFF_OVERLONG(const U8 * const s, const STRLEN len)
+{
+    PERL_ARGS_ASSERT_ISFF_OVERLONG;
 
     /* Check for the FF overlong.  This happens only if all these bytes match;
      * what comes after them doesn't matter.  See tables in utf8.h,
-     * utfebcdic.h.  (Can't happen on ASCII 32-bit platforms, as overflows
-     * instead.) */
-
-    if (   len >= sizeof(FF_OVERLONG_PREFIX) - 1
-        && UNLIKELY(memEQ(s, FF_OVERLONG_PREFIX,
-                                            sizeof(FF_OVERLONG_PREFIX) - 1)))
-    {
-        return TRUE;
-    }
-
-#else
-
-    PERL_UNUSED_ARG(len);
-
-#endif
+     * utfebcdic.h. */
 
-    return FALSE;
+    return    len >= sizeof(FF_OVERLONG_PREFIX) - 1
+           && UNLIKELY(memEQ(s, FF_OVERLONG_PREFIX,
+                                            sizeof(FF_OVERLONG_PREFIX) - 1));
 }
 
 #undef F0_ABOVE_OVERLONG