utf8.h: Correct some values for EBCDIC

It occurred to me that EBCDIC has different maximums for the number of bytes a character can occupy. This moves the definition in utf8.h to within an #ifndef EBCDIC, and adds the correct values to utfebcdic.h
author: Karl Williamson <public@khwilliamson.com> 2012-10-13 09:52:42 -0600
committer: Karl Williamson <public@khwilliamson.com> 2012-10-14 09:03:38 -0600
commit: 03c769848502ede1a551c8271a32945860d0d2fb (patch)
tree: 663e2e0299fa7ce84a2ad35f7b911f673cd9b40f /utfebcdic.h
parent: 1f327b5e67f741f6680b230ab89f99ab63fca5c4 (diff)
download: perl-03c769848502ede1a551c8271a32945860d0d2fb.tar.gz
1 files changed, 13 insertions, 0 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index 16621036e7..56ae0d230c 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -578,6 +578,19 @@ END_EXTERN_C
 #define UTF_CONTINUATION_MASK		((U8)0x1f)
 #define UTF_ACCUMULATION_SHIFT		5
 
+/* How wide can a single UTF-8 encoded character become in bytes. */
+/* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8
+ * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be
+ * expressed with 5 bytes.  However, Perl thinks of UTF-8 as a way to encode
+ * non-negative integers in a binary format, even those above Unicode */
+#define UTF8_MAXBYTES 7
+
+/* The maximum number of UTF-8 bytes a single Unicode character can
+ * uppercase/lowercase/fold into.  Unicode guarantees that the maximum
+ * expansion is 3 characters.  On EBCDIC platforms, the highest Unicode
+ * character occupies 5 bytes, therefore this number is 15 */
+#define UTF8_MAXBYTES_CASE	15
+
 /*
  * Local variables:
  * c-indentation-style: bsd
author	Karl Williamson <public@khwilliamson.com>	2012-10-13 09:52:42 -0600
committer	Karl Williamson <public@khwilliamson.com>	2012-10-14 09:03:38 -0600
commit	03c769848502ede1a551c8271a32945860d0d2fb (patch)
tree	663e2e0299fa7ce84a2ad35f7b911f673cd9b40f /utfebcdic.h
parent	1f327b5e67f741f6680b230ab89f99ab63fca5c4 (diff)
download	perl-03c769848502ede1a551c8271a32945860d0d2fb.tar.gz