summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--utf8.h7
-rw-r--r--utfebcdic.h8
2 files changed, 15 insertions, 0 deletions
diff --git a/utf8.h b/utf8.h
index 8fb8764411..29b052cd64 100644
--- a/utf8.h
+++ b/utf8.h
@@ -227,6 +227,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
*/
+/* Anything larger than this will overflow the word if it were converted into a UV */
+#if defined(UV_IS_QUAD)
+# define HIGHEST_REPRESENTABLE_UTF8 "\xFF\x80\x8F\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#else
+# define HIGHEST_REPRESENTABLE_UTF8 "\xFE\x83\xBF\xBF\xBF\xBF\xBF"
+#endif
+
/* Is the representation of the Unicode code point 'cp' the same regardless of
* being encoded in UTF-8 or not? */
#define OFFUNI_IS_INVARIANT(cp) isASCII(cp)
diff --git a/utfebcdic.h b/utfebcdic.h
index a6ba4fa6a3..227e0eb3db 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -268,6 +268,14 @@ explicitly forbidden, and the shortest possible encoding should always be used
* for more */
#define QUESTION_MARK_CTRL LATIN1_TO_NATIVE(0x9F)
+/* Any I8 string larger than this will overflow the word if it were converted into a UV */
+#if defined(UV_IS_QUAD)
+# define HIGHEST_REPRESENTABLE_UTF8 "\xFF\xAF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF\xBF"
+#else
+# define HIGHEST_REPRESENTABLE_UTF8 "\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA3\xBF\xBF\xBF\xBF\xBF\xBF"
+#endif
+
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/