diff options
author | Karl Williamson <khw@cpan.org> | 2019-04-03 12:06:47 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2019-04-03 20:42:46 -0600 |
commit | 05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc (patch) | |
tree | 803edb6c4e526cbe87e2594a3f142810cf94de9a /t/charset_tools.pl | |
parent | 83f3439aaa8b2e2319b22c3dea40b3d106dac084 (diff) | |
download | perl-05d49a9a9b7c8a3b3a1d1b4f1ce46db05b04dcbc.tar.gz |
t/charset_tools.pl: Add comments
Diffstat (limited to 't/charset_tools.pl')
-rw-r--r-- | t/charset_tools.pl | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/t/charset_tools.pl b/t/charset_tools.pl index 6e88a37531..877cead0fa 100644 --- a/t/charset_tools.pl +++ b/t/charset_tools.pl @@ -164,6 +164,12 @@ for (my $i = 0; $i < 256; $i++) { $native_to_i8[$i8_to_native[$i]] = $i; } +# Use these to convert to/from UTF-8 bytes. I8 is the encoding that +# corresponds to UTF-8 with start bytes, continuation bytes, and invariant +# bytes. UTF-EBCDIC is derived from this by a mapping which causes things +# like the start byte C5 to map to something else, as C5 is actually an 'E' in +# EBCDIC so can't be a real start byte, as it must be an invariant; and it +# maps 0x45 (an ASCII 'E') to C5. *I8_to_native = ($::IS_ASCII) ? sub { return shift } : sub { return join "", map { chr $i8_to_native[ord $_] } |