Make defn of UVCHR_IS_INVARIANT common

This can be derived from other values, removing an EBCDIC dependency
author: Karl Williamson <khw@cpan.org> 2019-10-02 20:37:17 -0600
committer: Karl Williamson <khw@cpan.org> 2019-10-06 11:07:09 -0600
commit: ab2e28c2f2b8f2edf930448a1c0182a8bd4f469f (patch)
tree: 16c1c3220b88f335af9a3d93b43815dcfe1c43ac
parent: 2dc97505e86018c7ceba8c96fd84f477c8dd45d3 (diff)
download: perl-ab2e28c2f2b8f2edf930448a1c0182a8bd4f469f.tar.gz
2 files changed, 14 insertions, 24 deletions
diff --git a/utf8.h b/utf8.h
index 4cf52e45d2..70dd734651 100644
--- a/utf8.h
+++ b/utf8.h
@@ -272,19 +272,7 @@ Perl's extended UTF-8 means we can have start bytes up through FF, though any
 beginning with FF yields a code point that is too large for 32-bit ASCII
 platforms.  FF signals to use 13 bytes for the encoded character.  This breaks
 the paradigm that the number of leading bits gives how many total bytes there
-are in the character.
-
-=for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp
-
-Evaluates to 1 if the representation of code point C<cp> is the same whether or
-not it is encoded in UTF-8; otherwise evaluates to 0.  UTF-8 invariant
-characters can be copied as-is when converting to/from UTF-8, saving time.
-C<cp> is Unicode if above 255; otherwise is platform-native.
-
-=cut
- */
-
-#define UVCHR_IS_INVARIANT(cp)      OFFUNI_IS_INVARIANT(cp)
+are in the character. */
 
 /* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
  * in UTF-8?  This is the inverse of UTF8_IS_INVARIANT.  The |0 makes sure this
@@ -339,6 +327,18 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
  * UTF-8,EBCDIC */
 #define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK)
 
+/*
+=for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp
+
+Evaluates to 1 if the representation of code point C<cp> is the same whether or
+not it is encoded in UTF-8; otherwise evaluates to 0.  UTF-8 invariant
+characters can be copied as-is when converting to/from UTF-8, saving time.
+C<cp> is Unicode if above 255; otherwise is platform-native.
+
+=cut
+ */
+#define UVCHR_IS_INVARIANT(cp)  (OFFUNI_IS_INVARIANT(NATIVE_TO_UNI(cp)))
+
 /* Internal macro to be used only in this file to aid in constructing other
  * publicly accessible macros.
  * The number of bytes required to express this uv in UTF-8, for just those
diff --git a/utfebcdic.h b/utfebcdic.h
index 99a5bad5c3..d52d54a43f 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -202,17 +202,7 @@ possible to UTF-8-encode a single code point in different ways, but that is
 explicitly forbidden, and the shortest possible encoding should always be used
 (and that is what Perl does). */
 
-/* It turns out that on EBCDIC platforms, the invariants are the characters
- * that have ASCII equivalents, plus the C1 controls.  Since the C0 controls
- * and DELETE are ASCII, this is the same as: (isASCII(uv) || isCNTRL_L1(uv))
- * */
-#define UVCHR_IS_INVARIANT(uv) cBOOL(FITS_IN_8_BITS(uv)                        \
-   && (PL_charclass[(U8) (uv)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))
-
-/* UTF-EBCDIC semantic macros - We used to transform back into I8 and then
- * compare, but now only have to do a single lookup by using a bit in
- * l1_char_class_tab.h.
- * Comments as to the meaning of each are given at their corresponding utf8.h
+/* Comments as to the meaning of each are given at their corresponding utf8.h
  * definitions. */
 /* Equivalent to ! UVCHR_IS_INVARIANT(c) */
 #define UTF8_IS_CONTINUED(c) 		cBOOL(FITS_IN_8_BITS(c)                 \
author	Karl Williamson <khw@cpan.org>	2019-10-02 20:37:17 -0600
committer	Karl Williamson <khw@cpan.org>	2019-10-06 11:07:09 -0600
commit	ab2e28c2f2b8f2edf930448a1c0182a8bd4f469f (patch)
tree	16c1c3220b88f335af9a3d93b43815dcfe1c43ac
parent	2dc97505e86018c7ceba8c96fd84f477c8dd45d3 (diff)
download	perl-ab2e28c2f2b8f2edf930448a1c0182a8bd4f469f.tar.gz