summaryrefslogtreecommitdiff
path: root/utf8.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-10-02 18:08:32 -0600
committerKarl Williamson <khw@cpan.org>2019-10-06 11:07:09 -0600
commit2dc97505e86018c7ceba8c96fd84f477c8dd45d3 (patch)
treee24f92532a3fe40c623a6175e5ee9d3f08684e8a /utf8.h
parent7c88d61e18cab1244ecd155556e1f0b3563a7e4a (diff)
downloadperl-2dc97505e86018c7ceba8c96fd84f477c8dd45d3.tar.gz
Make defn of OFFUNI_IS_INVARIANT common
This can be derived from other values, removing an EBCDIC dependency
Diffstat (limited to 'utf8.h')
-rw-r--r--utf8.h13
1 files changed, 5 insertions, 8 deletions
diff --git a/utf8.h b/utf8.h
index 2402c6fa72..4cf52e45d2 100644
--- a/utf8.h
+++ b/utf8.h
@@ -274,14 +274,6 @@ platforms. FF signals to use 13 bytes for the encoded character. This breaks
the paradigm that the number of leading bits gives how many total bytes there
are in the character.
-=cut
-*/
-
-/* Is the representation of the Unicode code point 'cp' the same regardless of
- * being encoded in UTF-8 or not? */
-#define OFFUNI_IS_INVARIANT(cp) isASCII(cp)
-
-/*
=for apidoc Am|bool|UVCHR_IS_INVARIANT|UV cp
Evaluates to 1 if the representation of code point C<cp> is the same whether or
@@ -342,6 +334,11 @@ C<cp> is Unicode if above 255; otherwise is platform-native.
(((NATIVE_UTF8_TO_I8(c) & UTF_IS_CONTINUATION_MASK) \
== UTF_CONTINUATION_MARK)))
+/* Is the representation of the Unicode code point 'cp' the same regardless of
+ * being encoded in UTF-8 or not? This is a fundamental property of
+ * UTF-8,EBCDIC */
+#define OFFUNI_IS_INVARIANT(c) (((WIDEST_UTYPE)(c)) < UTF_CONTINUATION_MARK)
+
/* Internal macro to be used only in this file to aid in constructing other
* publicly accessible macros.
* The number of bytes required to express this uv in UTF-8, for just those