utf8.h: Comments only

An earlier commit had split some comments up. And this adds clarifying details.
author: Karl Williamson <khw@cpan.org> 2017-06-30 13:21:58 -0600
committer: Karl Williamson <khw@cpan.org> 2017-07-12 21:14:26 -0600
commit: c4e96019708f80aedf076564f0d2994581c027b9 (patch)
tree: 2460a35783646621d3ee023909ae1994d7c353fc /utf8.h
parent: 0a8a1a5b0c576b95f3c4a48a6912f86bcf34e281 (diff)
download: perl-c4e96019708f80aedf076564f0d2994581c027b9.tar.gz
1 files changed, 16 insertions, 9 deletions
diff --git a/utf8.h b/utf8.h
index 0f29817757..4d2d01b16c 100644
--- a/utf8.h
+++ b/utf8.h
@@ -767,18 +767,25 @@ case any call to string overloading updates the internal UTF-8 encoding flag.
 #define UTF8_GOT_SURROGATE		UTF8_DISALLOW_SURROGATE
 #define UTF8_WARN_SURROGATE		0x0200
 
-#define UTF8_DISALLOW_NONCHAR           0x0400	/* Unicode non-character */
+/* Unicode non-character  code points */
+#define UTF8_DISALLOW_NONCHAR           0x0400
 #define UTF8_GOT_NONCHAR                UTF8_DISALLOW_NONCHAR
-#define UTF8_WARN_NONCHAR               0x0800	/*  code points */
+#define UTF8_WARN_NONCHAR               0x0800
 
-#define UTF8_DISALLOW_SUPER		0x1000	/* Super-set of Unicode: code */
+/* Super-set of Unicode: code points above the legal max */
+#define UTF8_DISALLOW_SUPER		0x1000
 #define UTF8_GOT_SUPER		        UTF8_DISALLOW_SUPER
-#define UTF8_WARN_SUPER		        0x2000	/* points above the legal max */
-
-/* Code points which never were part of the original UTF-8 standard, which only
- * went up to 2 ** 31 - 1.  Note that these all overflow a signed 32-bit word,
- * The first byte of these code points is FE or FF on ASCII platforms.  If the
- * first byte is FF, it will overflow a 32-bit word. */
+#define UTF8_WARN_SUPER		        0x2000
+
+/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or
+ * 0xFF, though UTF-EBCDIC did.  This allowed both versions to represent code
+ * points up to 2 ** 31 - 1.  Perl extends UTF-8 so that 0xFE and 0xFF are
+ * usable on ASCII platforms, and 0xFF means something different than
+ * UTF-EBCDIC defines.  These changes allow code points of 64 bits (actually
+ * somewhat more) to be represented on both platforms.  But these are Perl
+ * extensions, and not likely to be interchangeable with other languages.  Note
+ * that on ASCII platforms, FE overflows a signed 32-bit word, and FF an
+ * unsigned one. */
 #define UTF8_DISALLOW_PERL_EXTENDED     0x4000
 #define UTF8_GOT_PERL_EXTENDED          UTF8_DISALLOW_PERL_EXTENDED
 #define UTF8_WARN_PERL_EXTENDED         0x8000
author	Karl Williamson <khw@cpan.org>	2017-06-30 13:21:58 -0600
committer	Karl Williamson <khw@cpan.org>	2017-07-12 21:14:26 -0600
commit	c4e96019708f80aedf076564f0d2994581c027b9 (patch)
tree	2460a35783646621d3ee023909ae1994d7c353fc /utf8.h
parent	0a8a1a5b0c576b95f3c4a48a6912f86bcf34e281 (diff)
download	perl-c4e96019708f80aedf076564f0d2994581c027b9.tar.gz