diff options
author | Karl Williamson <khw@cpan.org> | 2019-10-02 18:08:32 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-10-06 11:07:09 -0600 |
commit | 2dc97505e86018c7ceba8c96fd84f477c8dd45d3 (patch) | |
tree | e24f92532a3fe40c623a6175e5ee9d3f08684e8a | |
parent | 7c88d61e18cab1244ecd155556e1f0b3563a7e4a (diff) | |
download | perl-2dc97505e86018c7ceba8c96fd84f477c8dd45d3.tar.gz |
Make defn of OFFUNI_IS_INVARIANT common
This can be derived from other values, removing an EBCDIC dependency
-rw-r--r-- | utf8.h | 13 | ||||
-rw-r--r-- | utfebcdic.h | 3 |
2 files changed, 5 insertions, 11 deletions
@@ -274,14 +274,6 @@ platforms. FF signals to use 13 bytes for the encoded character. This breaks the paradigm that the number of leading bits gives how many total bytes there are in the character. -=cut -*/ - -/* Is the representation of the Unicode code point 'cp' the same regardless of - * being encoded in UTF-8 or not? */ -#define OFFUNI_IS_INVARIANT(cp) isASCII(cp) - -/* =for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp Evaluates to 1 if the representation of code point C<cp> is the same whether or @@ -342,6 +334,11 @@ C<cp> is Unicode if above 255; otherwise is platform-native. (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \ == UTF_CONTINUATION_MARK))) +/* Is the representation of the Unicode code point 'cp' the same regardless of + * being encoded in UTF-8 or not? This is a fundamental property of + * UTF-8,EBCDIC */ +#define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK) + /* Internal macro to be used only in this file to aid in constructing other * publicly accessible macros. * The number of bytes required to express this uv in UTF-8, for just those diff --git a/utfebcdic.h b/utfebcdic.h index d0cf139ff3..99a5bad5c3 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -202,9 +202,6 @@ possible to UTF-8-encode a single code point in different ways, but that is explicitly forbidden, and the shortest possible encoding should always be used (and that is what Perl does). */ -/* This is a fundamental property of UTF-EBCDIC */ -#define OFFUNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0) - /* It turns out that on EBCDIC platforms, the invariants are the characters * that have ASCII equivalents, plus the C1 controls. Since the C0 controls * and DELETE are ASCII, this is the same as: (isASCII(uv) || isCNTRL_L1(uv)) |