diff options
author | Karl Williamson <khw@cpan.org> | 2019-10-02 17:13:31 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-10-06 11:07:09 -0600 |
commit | f4225fa0e24724a97c2ff1d4e608353ca1537506 (patch) | |
tree | 155bfb82927141727d133caa5dd1a43fc23a867f | |
parent | 38f458ffd56c0eb9f5df18cb6693ca326a4b1374 (diff) | |
download | perl-f4225fa0e24724a97c2ff1d4e608353ca1537506.tar.gz |
Make defn of UTF8_IS_CONTINUATION common
This can be derived from other values, removing an EBCDIC dependency
-rw-r--r-- | utf8.h | 12 | ||||
-rw-r--r-- | utfebcdic.h | 8 |
2 files changed, 6 insertions, 14 deletions
@@ -307,12 +307,6 @@ C<cp> is Unicode if above 255; otherwise is platform-native. #define UTF8_IS_START(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ ((U8)((c) | 0)) >= 0xc2) -/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the - * first byte thereof? The |0 makes sure this isn't mistakenly called with a - * ptr argument */ -#define UTF8_IS_CONTINUATION(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ - (((U8)((c) | 0)) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK) - /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence? Use * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to * be well-formed. Masking with 0xfe allows the low bit to be 0 or 1; thus @@ -363,6 +357,12 @@ C<cp> is Unicode if above 255; otherwise is platform-native. * the underlying reason that B0 works here) */ #define UTF_CONTINUATION_MARK (UTF_IS_CONTINUATION_MASK & 0xB0) +/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the + * first byte thereof? */ +#define UTF8_IS_CONTINUATION(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ + (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ + == UTF_CONTINUATION_MARK))) + /* Internal macro to be used only in this file to aid in constructing other * publicly accessible macros. * The number of bytes required to express this uv in UTF-8, for just those diff --git a/utfebcdic.h b/utfebcdic.h index 7200599532..ad4df4544f 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -220,14 +220,6 @@ explicitly forbidden, and the shortest possible encoding should always be used #define UTF8_IS_START(c) _generic_isCC(c, _CC_UTF8_IS_START) -#define UTF8_IS_CONTINUATION(c) _generic_isCC(c, _CC_UTF8_IS_CONTINUATION) - -/* The above instead could be written as this: -#define UTF8_IS_CONTINUATION(c) \ - (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ - == UTF_CONTINUATION_MARK) - */ - /* Equivalent to ! UVCHR_IS_INVARIANT(c) */ #define UTF8_IS_CONTINUED(c) cBOOL(FITS_IN_8_BITS(c) \ && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL)))) |