summaryrefslogtreecommitdiff
path: root/lib/encoding.pm
blob: 472a10a008c5295f89cc91808d4fa6f67dc043c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package encoding;

use Encode;

sub import {
    my ($class, $name) = @_;
    $name = $ENV{PERL_ENCODING} if @_ < 2;
    my $enc = find_encoding($name);
    unless (defined $enc) {
	require Carp;
	Carp::croak "Unknown encoding '$name'";
    }
    ${^ENCODING} = $enc;
}

=pod

=head1 NAME

encoding - pragma to control the conversion of legacy data into Unicode

=head1 SYNOPSIS

    use encoding "iso 8859-7";

    $a = "\xDF";
    $b = "\x{100}";

    $c = $a . $b;

    # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
    # The \xDF of ISO 8859-7 is \x{3af} in Unicode.

=head1 DESCRIPTION

Normally when legacy 8-bit data is converted to Unicode the data is
expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
encoding pragma you can change this default.

The pragma is a per script, not a per block lexical.  Only the last
'use encoding' seen matters.

=head1 FUTURE POSSIBILITIES

The C<\x..> and C<\0...> in literals and regular expressions are not
affected by this pragma.  They probably should.

=head1 SEE ALSO

L<perlunicode>

=cut

1;