summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-07-09 14:10:07 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-07-09 14:10:07 +0000
commit58d53262e10d9ded123ab3c776856d53acee44d4 (patch)
treef05e631e7448b1752175f33f00cbd9a595369b91
parent7faf300de1e914c298cdbf0efe3ea379f66c9f51 (diff)
downloadperl-58d53262e10d9ded123ab3c776856d53acee44d4.tar.gz
Add a pseudolayer ":locale" to the open pragma which
will get the encoding from the locale. Yet undocumented because I can't get the PerlIO :encoding(foobar) to work. p4raw-id: //depot/perl@11236
-rw-r--r--ext/Encode/Encode.pm21
-rw-r--r--lib/open.pm35
2 files changed, 47 insertions, 9 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index a274f387bf..4e55f46286 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -102,17 +102,14 @@ sub define_alias
# Allow variants of iso-8859-1 etc.
define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' );
-# This is a font issue, not an encoding issue.
-# (The currency symbol of the Latin 1 upper half is redefined
-# as the euro symbol.)
-define_alias( qr/^(.+)\@euro$/i => '"$1"' );
-
-# Solaris has this as a generic Latin-1 encoding.
-define_alias( qr/^iso_8859_1$/ => 'iso-8859-1' );
-
# At least HP-UX has these.
define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' );
+# This is a font issue, not an encoding issue.
+# (The currency symbol of the Latin 1 upper half
+# has been redefined as the euro symbol.)
+define_alias( qr/^(.+)\@euro$/i => '"$1"' );
+
# Allow latin-1 style names as well
define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i => '"iso-8859-$latin2iso_num[$1]"' );
@@ -126,9 +123,15 @@ define_alias( 'ascii' => 'US-ascii',
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN.
define_alias( qr/^ibm[-_]?(\d\d\d\d?)$/i => '"cp$1"');
-# Standardize on the dashed version.
+# Standardize on the dashed versions.
+define_alias( qr/^utf8$/i => 'utf-8' );
define_alias( qr/^koi8r$/i => 'koi8-r' );
+# TODO: the HP-UX '8' encodings: arabic8 greek8 hebrew8 roman8 turkish8
+# TODO: the Thai Encoding tis620
+# TODO: the Chinese Encoding gb18030
+# TODO: what is the Japanese 'ujis' encoding seen in some Linuxes?
+
# Map white space and _ to '-'
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' );
diff --git a/lib/open.pm b/lib/open.pm
index 3a08b797fd..085e770162 100644
--- a/lib/open.pm
+++ b/lib/open.pm
@@ -4,6 +4,33 @@ $open::hint_bits = 0x20000;
our $VERSION = '1.01';
+my $locale_encoding;
+
+sub in_locale { $^H & $locale::hint_bits }
+
+sub _get_locale_encoding {
+ unless (defined $locale_encoding) {
+ eval { use I18N::Langinfo qw(langinfo CODESET) };
+ unless ($@) {
+ $locale_encoding = langinfo(CODESET);
+ }
+ if (not $locale_encoding && in_locale()) {
+ if ($ENV{LC_ALL} =~ /^[^.]+\.([^.]+)$/) {
+ $locale_encoding = $1;
+ } elsif ($ENV{LANG} =~ /^[^.]+\.([^.]+)$/) {
+ $locale_encoding = $1;
+ }
+ } else {
+ # Could do heuristics based on the country and language
+ # parts of LC_ALL and LANG (the parts before the dot (if any)),
+ # since we have Locale::Country and Locale::Language available.
+ # TODO: get a database of Language -> Encoding mappings
+ # (the Estonian database would be excellent!)
+ # --jhi
+ }
+ }
+}
+
sub import {
my ($class,@args) = @_;
croak("`use open' needs explicit list of disciplines") unless @args;
@@ -17,6 +44,14 @@ sub import {
my @val;
foreach my $layer (split(/\s+/,$discp)) {
$layer =~ s/^://;
+ if ($layer eq 'locale') {
+ use Encode;
+ _get_locale_encoding()
+ unless defined $locale_encoding;
+ croak "Cannot figure out an encoding to use"
+ unless defined $locale_encoding;
+ $layer = "encoding($locale_encoding)";
+ }
unless(PerlIO::Layer::->find($layer)) {
carp("Unknown discipline layer '$layer'");
}