summaryrefslogtreecommitdiff
path: root/t/charset_tools.pl
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-04-03 12:06:47 -0600
committerKarl Williamson <khw@cpan.org>2019-04-03 20:42:46 -0600
commit05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc (patch)
tree803edb6c4e526cbe87e2594a3f142810cf94de9a /t/charset_tools.pl
parent83f3439aaa8b2e2319b22c3dea40b3d106dac084 (diff)
downloadperl-05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc.tar.gz
t/charset_tools.pl: Add comments
Diffstat (limited to 't/charset_tools.pl')
-rw-r--r--t/charset_tools.pl6
1 files changed, 6 insertions, 0 deletions
diff --git a/t/charset_tools.pl b/t/charset_tools.pl
index 6e88a37531..877cead0fa 100644
--- a/t/charset_tools.pl
+++ b/t/charset_tools.pl
@@ -164,6 +164,12 @@ for (my $i = 0; $i < 256; $i++) {
$native_to_i8[$i8_to_native[$i]] = $i;
}
+# Use these to convert to/from UTF-8 bytes. I8 is the encoding that
+# corresponds to UTF-8 with start bytes, continuation bytes, and invariant
+# bytes. UTF-EBCDIC is derived from this by a mapping which causes things
+# like the start byte C5 to map to something else, as C5 is actually an 'E' in
+# EBCDIC so can't be a real start byte, as it must be an invariant; and it
+# maps 0x45 (an ASCII 'E') to C5.
*I8_to_native = ($::IS_ASCII)
? sub { return shift }
: sub { return join "", map { chr $i8_to_native[ord $_] }