diff options
-rw-r--r-- | utf8.h | 6 | ||||
-rw-r--r-- | utfebcdic.h | 9 |
2 files changed, 14 insertions, 1 deletions
@@ -221,9 +221,13 @@ Perl's extended UTF-8 means we can have start bytes up to FF. * illegal overlong sequences that begin with C0 and C1. */ #define UTF8_IS_START(c) (((U8)c) >= 0xc2) +/* For use in UTF8_IS_CONTINUATION() below */ +#define UTF_IS_CONTINUATION_MASK 0xC0 + /* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the * first byte thereof? */ -#define UTF8_IS_CONTINUATION(c) ((((U8)c) & 0xC0) == UTF_CONTINUATION_MARK) +#define UTF8_IS_CONTINUATION(c) \ + ((((U8)c) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK) /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to diff --git a/utfebcdic.h b/utfebcdic.h index 3a4fcc28af..10b666afe2 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -205,8 +205,17 @@ above what a 64 bit word can hold */ * definitions. */ #define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START) + +#define UTF_IS_CONTINUATION_MASK 0xE0 + #define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION) +/* The above instead could be written as this: +#define UTF8_IS_CONTINUATION(c) \ + (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ + == UTF_CONTINUATION_MARK) + */ + /* Equivalent to ! UVCHR_IS_INVARIANT(c) */ #define UTF8_IS_CONTINUED(c) cBOOL(FITS_IN_8_BITS(c) \ && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL)))) |