summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-10-06 10:50:49 -0600
committerKarl Williamson <khw@cpan.org>2019-10-06 11:07:09 -0600
commit2c03e801f9133bacfe39d2a12decdd9d2b3b075a (patch)
tree10e0c37d317b734e27acf0b0125587d690b050d9 /utf8.h
parentab2e28c2f2b8f2edf930448a1c0182a8bd4f469f (diff)
downloadperl-2c03e801f9133bacfe39d2a12decdd9d2b3b075a.tar.gz
Make defn of UTF_IS_CONTINUED common
This can be derived from other values, removing an EBCDIC dependency
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h11
1 files changed, 5 insertions, 6 deletions
diff --git a/utf8.h b/utf8.h
index 70dd734651..889324e587 100644
--- a/utf8.h
+++ b/utf8.h
@@ -274,12 +274,6 @@ platforms. FF signals to use 13 bytes for the encoded character. This breaks
the paradigm that the number of leading bits gives how many total bytes there
are in the character. */
-/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
- * in UTF-8? This is the inverse of UTF8_IS_INVARIANT. The |0 makes sure this
- * isn't mistakenly called with a ptr argument */
-#define UTF8_IS_CONTINUED(c) (__ASSERT_(FITS_IN_8_BITS(c)) \
- ((U8)((c) | 0)) & UTF_CONTINUATION_MARK)
-
/* This is the number of low-order bits a continuation byte in a UTF-8 encoded
* sequence contributes to the specification of the code point. In the bit
* maps above, you see that the first 2 bits are a constant '10', leaving 6 of
@@ -589,6 +583,11 @@ with a ptr argument.
* above show, doesn't matter as to its implementation */
#define NATIVE_BYTE_IS_INVARIANT(c) UVCHR_IS_INVARIANT(c)
+/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
+ * in UTF-8? This is the inverse of UTF8_IS_INVARIANT. */
+#define UTF8_IS_CONTINUED(c) (__ASSERT_(FITS_IN_8_BITS(c)) \
+ (! UTF8_IS_INVARIANT(c)))
+
/* The macros in the next 4 sets are used to generate the two utf8 or utfebcdic
* bytes from an ordinal that is known to fit into exactly two (not one) bytes;
* it must be less than 0x3FF to work across both encodings. */