summaryrefslogtreecommitdiff
path: root/utfebcdic.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-05-16 10:43:40 -0600
committerKarl Williamson <khw@cpan.org>2015-09-04 10:21:18 -0600
commit38953e5a3d6b2697bc45b2caa0f7d48e6f834f90 (patch)
treea0a499c1c3fe7294631383aa08f69d244b68cc52 /utfebcdic.h
parent6c88483ef08a161b7af0e63d36bb9bfe3376f65c (diff)
downloadperl-38953e5a3d6b2697bc45b2caa0f7d48e6f834f90.tar.gz
Change EBCDIC macro definition
Prior to this commit UVCHR_IS_INVARIANT() was defined the same in both ASCII and EBCDIC, but they expanded to different things. Now, they are defined separately to what they expand to, and the EBCDIC version is changed when all expanded out to use PL_charclass[] instead of PL_e2a[]. The new array is more likely to be in the memory cache.
Diffstat (limited to 'utfebcdic.h')
-rw-r--r--utfebcdic.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index 1df7b3827f..c6c1d21d9b 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -172,6 +172,14 @@ END_EXTERN_C
#define UNI_IS_INVARIANT(c) (((UV)(c)) < 0xA0)
+/* It turns out that on EBCDIC platforms, the invariants are the characters
+ * that have ASCII equivalents, plus the C1 controls. Since the C0 controls
+ * and DELETE are ASCII, this is the same as: (isASCII(uv) || isCNTRL_L1(uv))
+ * */
+#define UVCHR_IS_INVARIANT(uv) cBOOL(FITS_IN_8_BITS(uv) \
+ && (PL_charclass[(U8) (uv)] & (_CC_mask(_CC_ASCII) | _CC_mask(_CC_CNTRL))))
+
+
/* UTF-EBCDIC semantic macros - We used to transform back into I8 and then
* compare, but now only have to do a single lookup by using a bit in
* l1_char_class_tab.h.