diff options
author | Karl Williamson <khw@cpan.org> | 2019-10-06 10:50:49 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-10-06 11:07:09 -0600 |
commit | 2c03e801f9133bacfe39d2a12decdd9d2b3b075a (patch) | |
tree | 10e0c37d317b734e27acf0b0125587d690b050d9 /utf8.h | |
parent | ab2e28c2f2b8f2edf930448a1c0182a8bd4f469f (diff) | |
download | perl-2c03e801f9133bacfe39d2a12decdd9d2b3b075a.tar.gz |
Make defn of UTF_IS_CONTINUED common
This can be derived from other values, removing an EBCDIC dependency
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 11 |
1 files changed, 5 insertions, 6 deletions
@@ -274,12 +274,6 @@ platforms. FF signals to use 13 bytes for the encoded character. This breaks the paradigm that the number of leading bits gives how many total bytes there are in the character. */ -/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence - * in UTF-8? This is the inverse of UTF8_IS_INVARIANT. The |0 makes sure this - * isn't mistakenly called with a ptr argument */ -#define UTF8_IS_CONTINUED(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ - ((U8)((c) | 0)) & UTF_CONTINUATION_MARK) - /* This is the number of low-order bits a continuation byte in a UTF-8 encoded * sequence contributes to the specification of the code point. In the bit * maps above, you see that the first 2 bits are a constant '10', leaving 6 of @@ -589,6 +583,11 @@ with a ptr argument. * above show, doesn't matter as to its implementation */ #define NATIVE_BYTE_IS_INVARIANT(c) UVCHR_IS_INVARIANT(c) +/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence + * in UTF-8? This is the inverse of UTF8_IS_INVARIANT. */ +#define UTF8_IS_CONTINUED(c) (__ASSERT_(FITS_IN_8_BITS(c)) \ + (! UTF8_IS_INVARIANT(c))) + /* The macros in the next 4 sets are used to generate the two utf8 or utfebcdic * bytes from an ordinal that is known to fit into exactly two (not one) bytes; * it must be less than 0x3FF to work across both encodings. */ |