summaryrefslogtreecommitdiff
path: root/handy.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-04-13 11:41:04 -0600
committerKarl Williamson <public@khwilliamson.com>2014-02-05 15:47:04 -0700
commit2bd1cbf6ef490552a1de7d86d43c05162e3e5e91 (patch)
tree900d97fa1a7662c381d473bdcdf3493a363a7d4c /handy.h
parent1a5eefe0dbec84fef092f54a1da3f267f6ac039d (diff)
downloadperl-2bd1cbf6ef490552a1de7d86d43c05162e3e5e91.tar.gz
handy.h Special case toCTRL('?') for EBCDIC
There is no change for ASCII platforms. For EBCDIC ones, toCTRL('?") and its inverse are special cased to map to/from the APC control character, which is the outlier control on these platforms. The reason to special case this is that otherwise toCTRL('?') would map to a graphic character, not a control. By outlier, I mean it is the one control not in the single block where all the other controls are placed. Further, it corresponds on two of the platforms with 0xFF, which is would be an EBCDIC rub-out character corresponding to an ASCII rub-out (or DEL) 0x7F, which is what toCTRL('?') maps to on ASCII. This is an outlier control on ASCII not being a member of the C0 nor C1 controls. Hence this make '?' mean the outlier control on both platforms.
Diffstat (limited to 'handy.h')
-rw-r--r--handy.h22
1 files changed, 17 insertions, 5 deletions
diff --git a/handy.h b/handy.h
index d695cf5183..2f0132f572 100644
--- a/handy.h
+++ b/handy.h
@@ -1633,11 +1633,23 @@ EXTCONST U32 PL_charclass[];
#define isALNUMC_utf8(p) isALPHANUMERIC_utf8(p)
#define isALNUMC_LC_utf8(p) isALPHANUMERIC_LC_utf8(p)
-/* This conversion works both ways, strangely enough. On EBCDIC platforms,
- * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII, except that they don't
- * necessarily mean the same characters, e.g. CTRL-D is 4 on both systems, but
- * that is EOT on ASCII; ST on EBCDIC */
-# define toCTRL(c) (toUPPER(NATIVE_TO_LATIN1(c)) ^ 64)
+/* On EBCDIC platforms, CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII,
+ * except that they don't necessarily mean the same characters, e.g. CTRL-D is
+ * 4 on both systems, but that is EOT on ASCII; ST on EBCDIC.
+ * '?' is special-cased on EBCDIC to APC, which is the control there that is
+ * the outlier from the block that contains the other controls, just like
+ * toCTRL('?') on ASCII yields DEL, the control that is the outlier from the C0
+ * block. If it weren't special cased, it would yield a non-control.
+ * The conversion works both ways, so CTRL('D') is 4, and CTRL(4) is D, etc. */
+#ifndef EBCDIC
+# define toCTRL(c) (toUPPER(c) ^ 64)
+#else
+# define toCTRL(c) ((c) == '?' \
+ ? LATIN1_TO_NATIVE(0x9F) \
+ : (c) == LATIN1_TO_NATIVE(0x9F) \
+ ? '?' \
+ : (NATIVE_TO_LATIN1(toUPPER(c)) ^ 64))
+#endif
/* Line numbers are unsigned, 32 bits. */
typedef U32 line_t;