summaryrefslogtreecommitdiff
path: root/lib/locale.pm
blob: 02e4bb2434764e315ddb225caa7a3cdb19902f3b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package locale;

our $VERSION = '1.09';
use Config;

$Carp::Internal{ (__PACKAGE__) } = 1;

=head1 NAME

locale - Perl pragma to use or avoid POSIX locales for built-in operations

=head1 WARNING

DO NOT USE this pragma in scripts that have multiple
L<threads|threads> active.  The locale is not local to a single thread.
Another thread may change the locale at any time, which could cause at a
minimum that a given thread is operating in a locale it isn't expecting
to be in.  On some platforms, segfaults can also occur.  The locale
change need not be explicit; some operations cause perl to change the
locale itself.  You are vulnerable simply by having done a C<"use
locale">.

=head1 SYNOPSIS

    @x = sort @y;      # Native-platform/Unicode code point sort order
    {
        use locale;
        @x = sort @y;  # Locale-defined sort order
    }
    @x = sort @y;      # Native-platform/Unicode code point sort order
                       # again

=head1 DESCRIPTION

This pragma tells the compiler to enable (or disable) the use of POSIX
locales for built-in operations (for example, LC_CTYPE for regular
expressions, LC_COLLATE for string comparison, and LC_NUMERIC for number
formatting).  Each "use locale" or "no locale"
affects statements to the end of the enclosing BLOCK.

See L<perllocale> for more detailed information on how Perl supports
locales.

On systems that don't have locales, this pragma will cause your operations
to behave as if in the "C" locale; attempts to change the locale will fail.

=cut

# A separate bit is used for each of the two forms of the pragma, to save
# having to look at %^H for the normal case of a plain 'use locale' without an
# argument.

$locale::hint_bits = 0x4;
$locale::partial_hint_bits = 0x10;  # If pragma has an argument

# The pseudo-category :characters consists of 2 real ones; but it also is
# given its own number, -1, because in the complement form it also has the
# side effect of "use feature 'unicode_strings'"

sub import {
    shift;  # should be 'locale'; not checked

    $^H{locale} = 0 unless defined $^H{locale};
    if (! @_) { # If no parameter, use the plain form that changes all categories
        $^H |= $locale::hint_bits;

    }
    else {
        my @categories = ( qw(:ctype :collate :messages
                              :numeric :monetary :time) );
        for (my $i = 0; $i < @_; $i++) {
            my $arg = $_[$i];
            $complement = $arg =~ s/ : ( ! | not_ ) /:/x;
            if (! grep { $arg eq $_ } @categories, ":characters") {
                require Carp;
                Carp::croak("Unknown parameter '$_[$i]' to 'use locale'");
            }

            if ($complement) {
                if ($i != 0 || $i < @_ - 1)  {
                    require Carp;
                    Carp::croak("Only one argument to 'use locale' allowed"
                                . "if is $complement");
                }

                if ($arg eq ':characters') {
                    push @_, grep { $_ ne ':ctype' && $_ ne ':collate' }
                                  @categories;
                    # We add 1 to the category number;  This category number
                    # is -1
                    $^H{locale} |= (1 << 0);
                }
                else {
                    push @_, grep { $_ ne $arg } @categories;
                }
                next;
            }
            elsif ($arg eq ':characters') {
                push @_, ':ctype', ':collate';
                next;
            }

            $^H |= $locale::partial_hint_bits;

            # This form of the pragma overrides the other
            $^H &= ~$locale::hint_bits;

            $arg =~ s/^://;

            eval { require POSIX; import POSIX 'locale_h'; };

            # Map our names to the ones defined by POSIX
            my $LC = "LC_" . uc($arg);

            my $bit = eval "&POSIX::$LC";
            if (defined $bit) { # XXX Should we warn that this category isn't
                                # supported on this platform, or make it
                                # always be the C locale?

                # Verify our assumption.
                if (! ($bit >= 0 && $bit < 31)) {
                    require Carp;
                    Carp::croak("Cannot have ':$arg' parameter to 'use locale'"
                              . " on this platform.  Use the 'perlbug' utility"
                              . " to report this problem, or send email to"
                              . " 'perlbug\@perl.org'.  $LC=$bit");
                }

                # 1 is added so that the pseudo-category :characters, which is
                # -1, comes out 0.
                $^H{locale} |= 1 << ($bit + 1);
            }
        }
    }

}

sub unimport {
    $^H &= ~($locale::hint_bits|$locale::partial_hint_bits);
    $^H{locale} = 0;
}

1;