summaryrefslogtreecommitdiff
path: root/lib/locale.pm
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-01-18 09:35:52 -0700
committerKarl Williamson <public@khwilliamson.com>2012-01-21 10:02:55 -0700
commit66cbab2c91fca8c9abc65a7231a053898208efe3 (patch)
treecb9e838d32b251f9f52082d29bb7009f074d192f /lib/locale.pm
parente439cacbc5a93fb9e6c524e31ac41772af51dfa0 (diff)
downloadperl-66cbab2c91fca8c9abc65a7231a053898208efe3.tar.gz
Add :not_characters parameter to 'use locale'
This adds the parameter handling, tests, and documentation for this new feature which allows locale and Unicode to play well with each other.
Diffstat (limited to 'lib/locale.pm')
-rw-r--r--lib/locale.pm49
1 files changed, 47 insertions, 2 deletions
diff --git a/lib/locale.pm b/lib/locale.pm
index 2398599595..e57a5fded2 100644
--- a/lib/locale.pm
+++ b/lib/locale.pm
@@ -2,6 +2,8 @@ package locale;
our $VERSION = '1.01';
+$Carp::Internal{ (__PACKAGE__) } = 1;
+
=head1 NAME
locale - Perl pragma to use or avoid POSIX locales for built-in operations
@@ -23,19 +25,62 @@ expressions, LC_COLLATE for string comparison, and LC_NUMERIC for number
formatting). Each "use locale" or "no locale"
affects statements to the end of the enclosing BLOCK.
+Starting in Perl 5.16, a hybrid mode for this pragma is available,
+
+ use locale ':not_characters';
+
+which enables only the portions of locales that don't affect the character
+set (that is, all except LC_COLLATE and LC_CTYPE). This is useful when mixing
+Unicode and locales, including UTF-8 locales.
+
+ use locale ':not_characters';
+ use open ":locale"; # Convert I/O to/from Unicode
+ use POSIX qw(locale_h); # Import the LC_ALL constant
+ setlocale(LC_ALL, ""); # Required for the next statement
+ # to take effect
+ printf "%.2f\n", 12345.67' # Locale-defined formatting
+ @x = sort @y; # Unicode-defined sorting order.
+ # (Note that you will get better
+ # results using Unicode::Collate.)
+
See L<perllocale> for more detailed information on how Perl supports
locales.
=cut
+# A separate bit is used for each of the two forms of the pragma, as they are
+# mostly independent, and interact with each other and the unicode_strings
+# feature. This allows for fast determination of which one(s) of the three
+# are to be used at any given point, and no code has to be written to deal
+# with coming in and out of scopes--it falls automatically out from the hint
+# handling
+
$locale::hint_bits = 0x4;
+$locale::not_chars_hint_bits = 0x10;
sub import {
- $^H |= $locale::hint_bits;
+ shift; # should be 'locale'; not checked
+ my $found_not_chars = 0;
+ while (defined (my $arg = shift)) {
+ if ($arg eq ":not_characters") {
+ $^H |= $locale::not_chars_hint_bits;
+
+ # This form of the pragma overrides the other
+ $^H &= ~$locale::hint_bits;
+ $found_not_chars = 1;
+ }
+ else {
+ require Carp;
+ Carp::croak("Unknown parameter '$arg' to 'use locale'");
+ }
+ }
+
+ # Use the plain form if not doing the :not_characters one.
+ $^H |= $locale::hint_bits unless $found_not_chars;
}
sub unimport {
- $^H &= ~$locale::hint_bits;
+ $^H &= ~($locale::hint_bits|$locale::not_chars_hint_bits);
}
1;