Make defn of UTF8_IS_CONTINUATION common

This can be derived from other values, removing an EBCDIC dependency
author: Karl Williamson <khw@cpan.org> 2019-10-02 17:13:31 -0600
committer: Karl Williamson <khw@cpan.org> 2019-10-06 11:07:09 -0600
commit: f4225fa0e24724a97c2ff1d4e608353ca1537506 (patch)
tree: 155bfb82927141727d133caa5dd1a43fc23a867f
parent: 38f458ffd56c0eb9f5df18cb6693ca326a4b1374 (diff)
download: perl-f4225fa0e24724a97c2ff1d4e608353ca1537506.tar.gz
2 files changed, 6 insertions, 14 deletions
diff --git a/utf8.h b/utf8.h
index dd4d1e1295..acc76fcdef 100644
--- a/utf8.h
+++ b/utf8.h
@@ -307,12 +307,6 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
 #define UTF8_IS_START(c)      (__ASSERT_(FITS_IN_8_BITS(c))                 \
                                ((U8)((c) | 0)) >= 0xc2)
 
-/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
- * first byte thereof?  The |0 makes sure this isn't mistakenly called with a
- * ptr argument */
-#define UTF8_IS_CONTINUATION(c)     (__ASSERT_(FITS_IN_8_BITS(c))           \
-     (((U8)((c) | 0)) & UTF_IS_CONTINUATION_MASK) == UTF_CONTINUATION_MARK)
-
 /* Is the UTF8-encoded byte 'c' the first byte of a two byte sequence?  Use
  * UTF8_IS_NEXT_CHAR_DOWNGRADEABLE() instead if the input isn't known to
  * be well-formed.  Masking with 0xfe allows the low bit to be 0 or 1; thus
@@ -363,6 +357,12 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
  * the underlying reason that B0 works here) */
 #define UTF_CONTINUATION_MARK       (UTF_IS_CONTINUATION_MASK & 0xB0)
 
+/* Is the byte 'c' part of a multi-byte UTF8-8 encoded sequence, and not the
+ * first byte thereof? */
+#define UTF8_IS_CONTINUATION(c)     (__ASSERT_(FITS_IN_8_BITS(c))           \
+            (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK)             \
+                                                == UTF_CONTINUATION_MARK)))
+
 /* Internal macro to be used only in this file to aid in constructing other
  * publicly accessible macros.
  * The number of bytes required to express this uv in UTF-8, for just those
diff --git a/utfebcdic.h b/utfebcdic.h
index 7200599532..ad4df4544f 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -220,14 +220,6 @@ explicitly forbidden, and the shortest possible encoding should always be used
 
 #define UTF8_IS_START(c)		_generic_isCC(c, _CC_UTF8_IS_START)
 
-#define UTF8_IS_CONTINUATION(c)		_generic_isCC(c, _CC_UTF8_IS_CONTINUATION)
-
-/* The above instead could be written as this:
-#define UTF8_IS_CONTINUATION(c)                                                 \
-            (((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK)                 \
-                                                == UTF_CONTINUATION_MARK)
- */
-
 /* Equivalent to ! UVCHR_IS_INVARIANT(c) */
 #define UTF8_IS_CONTINUED(c) 		cBOOL(FITS_IN_8_BITS(c)                 \
    && ! (PL_charclass[(U8) (c)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))
author	Karl Williamson <khw@cpan.org>	2019-10-02 17:13:31 -0600
committer	Karl Williamson <khw@cpan.org>	2019-10-06 11:07:09 -0600
commit	f4225fa0e24724a97c2ff1d4e608353ca1537506 (patch)
tree	155bfb82927141727d133caa5dd1a43fc23a867f
parent	38f458ffd56c0eb9f5df18cb6693ca326a4b1374 (diff)
download	perl-f4225fa0e24724a97c2ff1d4e608353ca1537506.tar.gz