diff options
Diffstat (limited to 'lib/encoding.pm')
-rw-r--r-- | lib/encoding.pm | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/lib/encoding.pm b/lib/encoding.pm index e758687c59..be0fd73a00 100644 --- a/lib/encoding.pm +++ b/lib/encoding.pm @@ -23,13 +23,16 @@ encoding - pragma to control the conversion of legacy data into Unicode use encoding "iso 8859-7"; + # The \xDF of ISO 8859-7 is \x{3af} in Unicode. + $a = "\xDF"; $b = "\x{100}"; + printf "%#x\n", ord($a); # will print 0x3af, not 0xdf + $c = $a . $b; # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". - # The \xDF of ISO 8859-7 is \x{3af} in Unicode. =head1 DESCRIPTION @@ -40,6 +43,10 @@ encoding pragma you can change this default. The pragma is a per script, not a per block lexical. Only the last C<use encoding> matters, and it affects B<the whole script>. +If no encoding is specified, the environment variable L<PERL_ENCODING> +is consulted. If no encoding can be found, C<Unknown encoding '...'> +error will be thrown. + =head1 FUTURE POSSIBILITIES The C<\x..> and C<\0...> in regular expressions are not @@ -56,7 +63,7 @@ You should not need C<use utf8> for anything else these days =head1 SEE ALSO -L<perlunicode> +L<perlunicode>, L<encode> =cut |