diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-07-09 14:10:07 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-07-09 14:10:07 +0000 |
commit | 58d53262e10d9ded123ab3c776856d53acee44d4 (patch) | |
tree | f05e631e7448b1752175f33f00cbd9a595369b91 | |
parent | 7faf300de1e914c298cdbf0efe3ea379f66c9f51 (diff) | |
download | perl-58d53262e10d9ded123ab3c776856d53acee44d4.tar.gz |
Add a pseudolayer ":locale" to the open pragma which
will get the encoding from the locale. Yet undocumented
because I can't get the PerlIO :encoding(foobar) to work.
p4raw-id: //depot/perl@11236
-rw-r--r-- | ext/Encode/Encode.pm | 21 | ||||
-rw-r--r-- | lib/open.pm | 35 |
2 files changed, 47 insertions, 9 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index a274f387bf..4e55f46286 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -102,17 +102,14 @@ sub define_alias # Allow variants of iso-8859-1 etc. define_alias( qr/^iso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); -# This is a font issue, not an encoding issue. -# (The currency symbol of the Latin 1 upper half is redefined -# as the euro symbol.) -define_alias( qr/^(.+)\@euro$/i => '"$1"' ); - -# Solaris has this as a generic Latin-1 encoding. -define_alias( qr/^iso_8859_1$/ => 'iso-8859-1' ); - # At least HP-UX has these. define_alias( qr/^iso8859(\d+)$/i => '"iso-8859-$1"' ); +# This is a font issue, not an encoding issue. +# (The currency symbol of the Latin 1 upper half +# has been redefined as the euro symbol.) +define_alias( qr/^(.+)\@euro$/i => '"$1"' ); + # Allow latin-1 style names as well define_alias( qr/^(?:iso[-_]?)?latin[-_]?(\d+)$/i => '"iso-8859-$latin2iso_num[$1]"' ); @@ -126,9 +123,15 @@ define_alias( 'ascii' => 'US-ascii', # At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. define_alias( qr/^ibm[-_]?(\d\d\d\d?)$/i => '"cp$1"'); -# Standardize on the dashed version. +# Standardize on the dashed versions. +define_alias( qr/^utf8$/i => 'utf-8' ); define_alias( qr/^koi8r$/i => 'koi8-r' ); +# TODO: the HP-UX '8' encodings: arabic8 greek8 hebrew8 roman8 turkish8 +# TODO: the Thai Encoding tis620 +# TODO: the Chinese Encoding gb18030 +# TODO: what is the Japanese 'ujis' encoding seen in some Linuxes? + # Map white space and _ to '-' define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); diff --git a/lib/open.pm b/lib/open.pm index 3a08b797fd..085e770162 100644 --- a/lib/open.pm +++ b/lib/open.pm @@ -4,6 +4,33 @@ $open::hint_bits = 0x20000; our $VERSION = '1.01'; +my $locale_encoding; + +sub in_locale { $^H & $locale::hint_bits } + +sub _get_locale_encoding { + unless (defined $locale_encoding) { + eval { use I18N::Langinfo qw(langinfo CODESET) }; + unless ($@) { + $locale_encoding = langinfo(CODESET); + } + if (not $locale_encoding && in_locale()) { + if ($ENV{LC_ALL} =~ /^[^.]+\.([^.]+)$/) { + $locale_encoding = $1; + } elsif ($ENV{LANG} =~ /^[^.]+\.([^.]+)$/) { + $locale_encoding = $1; + } + } else { + # Could do heuristics based on the country and language + # parts of LC_ALL and LANG (the parts before the dot (if any)), + # since we have Locale::Country and Locale::Language available. + # TODO: get a database of Language -> Encoding mappings + # (the Estonian database would be excellent!) + # --jhi + } + } +} + sub import { my ($class,@args) = @_; croak("`use open' needs explicit list of disciplines") unless @args; @@ -17,6 +44,14 @@ sub import { my @val; foreach my $layer (split(/\s+/,$discp)) { $layer =~ s/^://; + if ($layer eq 'locale') { + use Encode; + _get_locale_encoding() + unless defined $locale_encoding; + croak "Cannot figure out an encoding to use" + unless defined $locale_encoding; + $layer = "encoding($locale_encoding)"; + } unless(PerlIO::Layer::->find($layer)) { carp("Unknown discipline layer '$layer'"); } |