summaryrefslogtreecommitdiff
path: root/utfebcdic.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-05-13 17:25:28 -0600
committerKarl Williamson <khw@cpan.org>2015-08-01 10:34:50 -0600
commit80bfb4dc0e64adaa83033faf88835c4c45ea7a90 (patch)
tree37a2a93b9f322b659fcbce0577f73e1e20d44122 /utfebcdic.h
parente4f4ef45276357ecb830d75dd7993d94fb64707a (diff)
downloadperl-80bfb4dc0e64adaa83033faf88835c4c45ea7a90.tar.gz
utfebcdic.h: Comments only
Diffstat (limited to 'utfebcdic.h')
-rw-r--r--utfebcdic.h5
1 files changed, 3 insertions, 2 deletions
diff --git a/utfebcdic.h b/utfebcdic.h
index 24101edf6f..61f5c6cff3 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -87,7 +87,8 @@
* aren't equivalent to ASCII characters nor C1 controls form the set of
* continuation bytes; the remaining 64 non-ASCII, non-control code points form
* the potential start bytes, in order. (However, the first 5 of these lead to
- * malformed overlongs, so there really are only 59 start bytes.) Hence the
+ * malformed overlongs, so there really are only 59 start bytes, and the first
+ * three of the 59 are the start bytes for the Latin1 range.) Hence the
* UTF-EBCDIC for the smallest variant code point, 0x160, will have likely 0x41
* as its continuation byte, provided 0x41 isn't an ASCII or C1 equivalent.
* And its start byte will be the code point that is 37 (32+5) non-ASCII,
@@ -143,7 +144,7 @@ END_EXTERN_C
/*
The following table is adapted from tr16, it shows I8 encoding of Unicode code points.
- Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
+ Unicode U32 Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte
U+0000..U+007F 000000000xxxxxxx 0xxxxxxx
U+0080..U+009F 00000000100xxxxx 100xxxxx
U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx