summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-10-02 17:07:50 -0600
committerKarl Williamson <khw@cpan.org>2019-10-06 11:07:08 -0600
commit38f458ffd56c0eb9f5df18cb6693ca326a4b1374 (patch)
treeff10eba5cd482d48aae5bd1ff93a3ba92a7bb5e0
parent9f3cfb7a26dab519dbc83ef02bd3fbf084cb6fc3 (diff)
downloadperl-38f458ffd56c0eb9f5df18cb6693ca326a4b1374.tar.gz
Make defn of UTF_CONTINUATION_MARK common
This can be derived from other values, removing an EBCDIC dependency
-rw-r--r--utf8.h10
-rw-r--r--utfebcdic.h2
2 files changed, 6 insertions, 6 deletions
diff --git a/utf8.h b/utf8.h
index 356c3e4c4d..dd4d1e1295 100644
--- a/utf8.h
+++ b/utf8.h
@@ -294,10 +294,6 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
#define UVCHR_IS_INVARIANT(cp) OFFUNI_IS_INVARIANT(cp)
-/* This defines the bits that are to be in the continuation bytes of a multi-byte
- * UTF-8 encoded character that mark it is a continuation byte. */
-#define UTF_CONTINUATION_MARK 0x80
-
/* Misleadingly named: is the UTF8-encoded byte 'c' part of a variant sequence
* in UTF-8? This is the inverse of UTF8_IS_INVARIANT. The |0 makes sure this
* isn't mistakenly called with a ptr argument */
@@ -361,6 +357,12 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
* E0 in UTF-EBCDIC */
#define UTF_IS_CONTINUATION_MASK ((U8) (0xFF << UTF_ACCUMULATION_SHIFT))
+/* This defines the bits that are to be in the continuation bytes of a
+ * multi-byte UTF-8 encoded character that mark it is a continuation byte.
+ * This turns out to be 0x80 in UTF-8, 0xA0 in UTF-EBCDIC. (khw doesn't know
+ * the underlying reason that B0 works here) */
+#define UTF_CONTINUATION_MARK (UTF_IS_CONTINUATION_MASK & 0xB0)
+
/* Internal macro to be used only in this file to aid in constructing other
* publicly accessible macros.
* The number of bytes required to express this uv in UTF-8, for just those
diff --git a/utfebcdic.h b/utfebcdic.h
index f13f555f49..7200599532 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -245,9 +245,7 @@ explicitly forbidden, and the shortest possible encoding should always be used
#define isUTF8_POSSIBLY_PROBLEMATIC(c) \
_generic_isCC(c, _CC_UTF8_START_BYTE_IS_FOR_AT_LEAST_SURROGATE)
-#define UTF_CONTINUATION_MARK 0xA0
#define UTF_ACCUMULATION_SHIFT 5
-
/* ^? is defined to be APC on EBCDIC systems. See the definition of toCTRL()
* for more */
#define QUESTION_MARK_CTRL LATIN1_TO_NATIVE(0x9F)