t/charset_tools.pl: Add comments

author: Karl Williamson <khw@cpan.org> 2019-04-03 12:06:47 -0600
committer: Karl Williamson <khw@cpan.org> 2019-04-03 20:42:46 -0600
commit: 05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc (patch)
tree: 803edb6c4e526cbe87e2594a3f142810cf94de9a /t/charset_tools.pl
parent: 83f3439aaa8b2e2319b22c3dea40b3d106dac084 (diff)
download: perl-05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc.tar.gz
1 files changed, 6 insertions, 0 deletions
diff --git a/t/charset_tools.pl b/t/charset_tools.pl
index 6e88a37531..877cead0fa 100644
--- a/t/charset_tools.pl
+++ b/t/charset_tools.pl
@@ -164,6 +164,12 @@ for (my $i = 0; $i < 256; $i++) {
     $native_to_i8[$i8_to_native[$i]] = $i;
 }
 
+# Use these to convert to/from UTF-8 bytes.  I8 is the encoding that
+# corresponds to UTF-8 with start bytes, continuation bytes, and invariant
+# bytes.  UTF-EBCDIC is derived from this by a mapping which causes things
+# like the start byte C5 to map to something else, as C5 is actually an 'E' in
+# EBCDIC so can't be a real start byte, as it must be an invariant; and it
+# maps 0x45 (an ASCII 'E') to C5.
 *I8_to_native = ($::IS_ASCII)
                 ? sub { return shift }
                 : sub { return join "", map { chr $i8_to_native[ord $_] }
author	Karl Williamson <khw@cpan.org>	2019-04-03 12:06:47 -0600
committer	Karl Williamson <khw@cpan.org>	2019-04-03 20:42:46 -0600
commit	05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc (patch)
tree	803edb6c4e526cbe87e2594a3f142810cf94de9a /t/charset_tools.pl
parent	83f3439aaa8b2e2319b22c3dea40b3d106dac084 (diff)
download	perl-05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc.tar.gz