summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--utf8.h6
-rw-r--r--utfebcdic.h9
2 files changed, 14 insertions, 1 deletions
diff --git a/utf8.h b/utf8.h
index aefbe376a5..c57576bd5c 100644
--- a/utf8.h
+++ b/utf8.h
@@ -221,9 +221,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
* illegal overlong sequences that begin with C0 and C1. */
#define UTF8_IS_START(c) (((U8)c) >= 0xc2)
+/* For use in UTF8_IS_CONTINUATION() below */
+#define UTF_IS_CONTINUATION_MASK 0xC0
+
/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
* first byte thereof? */
-#define UTF8_IS_CONTINUATION(c) ((((U8)c) & 0xC0) == UTF_CONTINUATION_MARK)
+#define UTF8_IS_CONTINUATION(c) \
+ ((((U8)c) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK)
/* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use
* UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to
diff --git a/utfebcdic.h b/utfebcdic.h
index 3a4fcc28af..10b666afe2 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -205,8 +205,17 @@ above what a 64 bit word can hold */
* definitions. */
#define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START)
+
+#define UTF_IS_CONTINUATION_MASK 0xE0
+
#define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION)
+/* The above instead could be written as this:
+#define UTF8_IS_CONTINUATION(c) \
+ (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \
+ == UTF_CONTINUATION_MARK)
+ */
+
/* Equivalent to ! UVCHR_IS_INVARIANT(c) */
#define UTF8_IS_CONTINUED(c) cBOOL(FITS_IN_8_BITS(c) \
&& ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))