diff options
author | Karl Williamson <khw@cpan.org> | 2017-06-30 13:21:58 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2017-07-12 21:14:26 -0600 |
commit | c4e96019708f80aedf076564f0d2994581c027b9 (patch) | |
tree | 2460a35783646621d3ee023909ae1994d7c353fc /utf8.h | |
parent | 0a8a1a5b0c576b95f3c4a48a6912f86bcf34e281 (diff) | |
download | perl-c4e96019708f80aedf076564f0d2994581c027b9.tar.gz |
utf8.h: Comments only
An earlier commit had split some comments up. And this adds clarifying
details.
Diffstat (limited to 'utf8.h')
-rw-r--r-- | utf8.h | 25 |
1 files changed, 16 insertions, 9 deletions
@@ -767,18 +767,25 @@ case any call to string overloading updates the internal UTF-8 encoding flag. #define UTF8_GOT_SURROGATE UTF8_DISALLOW_SURROGATE #define UTF8_WARN_SURROGATE 0x0200 -#define UTF8_DISALLOW_NONCHAR 0x0400 /* Unicode non-character */ +/* Unicode non-character code points */ +#define UTF8_DISALLOW_NONCHAR 0x0400 #define UTF8_GOT_NONCHAR UTF8_DISALLOW_NONCHAR -#define UTF8_WARN_NONCHAR 0x0800 /* code points */ +#define UTF8_WARN_NONCHAR 0x0800 -#define UTF8_DISALLOW_SUPER 0x1000 /* Super-set of Unicode: code */ +/* Super-set of Unicode: code points above the legal max */ +#define UTF8_DISALLOW_SUPER 0x1000 #define UTF8_GOT_SUPER UTF8_DISALLOW_SUPER -#define UTF8_WARN_SUPER 0x2000 /* points above the legal max */ - -/* Code points which never were part of the original UTF-8 standard, which only - * went up to 2 ** 31 - 1. Note that these all overflow a signed 32-bit word, - * The first byte of these code points is FE or FF on ASCII platforms. If the - * first byte is FF, it will overflow a 32-bit word. */ +#define UTF8_WARN_SUPER 0x2000 + +/* The original UTF-8 standard did not define UTF-8 with start bytes of 0xFE or + * 0xFF, though UTF-EBCDIC did. This allowed both versions to represent code + * points up to 2 ** 31 - 1. Perl extends UTF-8 so that 0xFE and 0xFF are + * usable on ASCII platforms, and 0xFF means something different than + * UTF-EBCDIC defines. These changes allow code points of 64 bits (actually + * somewhat more) to be represented on both platforms. But these are Perl + * extensions, and not likely to be interchangeable with other languages. Note + * that on ASCII platforms, FE overflows a signed 32-bit word, and FF an + * unsigned one. */ #define UTF8_DISALLOW_PERL_EXTENDED 0x4000 #define UTF8_GOT_PERL_EXTENDED UTF8_DISALLOW_PERL_EXTENDED #define UTF8_WARN_PERL_EXTENDED 0x8000 |