diff options
author | Larry Wall <lwall@netlabs.com> | 1994-10-17 23:00:00 +0000 |
---|---|---|
committer | Larry Wall <lwall@netlabs.com> | 1994-10-17 23:00:00 +0000 |
commit | a0d0e21ea6ea90a22318550944fe6cb09ae10cda (patch) | |
tree | faca1018149b736b1142f487e44d1ff2de5cc1fa /lib/I18N | |
parent | 85e6fe838fb25b257a1b363debf8691c0992ef71 (diff) | |
download | perl-a0d0e21ea6ea90a22318550944fe6cb09ae10cda.tar.gz |
perl 5.000perl-5.000
[editor's note: this commit combines approximate 4 months of furious
releases of Andy Dougherty and Larry Wall - see pod/perlhist.pod for
details. Andy notes that;
Alas neither my "Irwin AccuTrack" nor my DC 600A quarter-inch cartridge
backup tapes from that era seem to be readable anymore. I guess 13 years
exceeds the shelf life for that backup technology :-(.
]
Diffstat (limited to 'lib/I18N')
-rw-r--r-- | lib/I18N/Collate.pm | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/lib/I18N/Collate.pm b/lib/I18N/Collate.pm new file mode 100644 index 0000000000..52c78abe83 --- /dev/null +++ b/lib/I18N/Collate.pm @@ -0,0 +1,97 @@ +package I18N::Collate; + +# Collate.pm +# +# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi> +# Helsinki University of Technology, Finland +# +# Acks: Guy Decoux <decoux@moulon.inra.fr> understood +# overloading magic much deeper than I and told +# how to cut the size of this code by more than half. +# (my first version did overload all of lt gt eq le ge cmp) +# +# Purpose: compare 8-bit scalar data according to the current locale +# +# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() +# +# Exports: setlocale 1) +# collate_xfrm 2) +# +# Overloads: cmp # 3) +# +# Usage: use Collate; +# setlocale(&LC_COLLATE, 'locale-of-your-choice'); # 4) +# $s1 = new Collate "scalar_data_1"; +# $s2 = new Collate "scalar_data_2"; +# +# now you can compare $s1 and $s2: $s1 le $s2 +# to extract the data itself, you need to deref: $$s1 +# +# Notes: +# 1) this uses POSIX::setlocale +# 2) the basic collation conversion is done by strxfrm() which +# terminates at NUL characters being a decent C routine. +# collate_xfrm handles embedded NUL characters gracefully. +# 3) due to cmp and overload magic, lt le eq ge gt work also +# 4) the available locales depend on your operating system; +# try whether "locale -a" shows them or the more direct +# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". +# The locale names are probably something like +# 'xx_XX.(ISO)?8859-N'. +# +# Updated: 19940913 1341 GMT +# +# --- + +use POSIX qw(strxfrm LC_COLLATE); + +require Exporter; + +@ISA = qw(Exporter); +@EXPORT = qw(collate_xfrm setlocale LC_COLLATE); +@EXPORT_OK = qw(); + +%OVERLOAD = qw( +fallback 1 +cmp collate_cmp +); + +sub new { my $new = $_[1]; bless \$new } + +sub setlocale { + my ($category, $locale) = @_[0,1]; + + POSIX::setlocale($category, $locale) if (defined $category); + # the current $LOCALE + $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; +} + +sub C { + my $s = ${$_[0]}; + + $C->{$LOCALE}->{$s} = collate_xfrm($s) + unless (defined $C->{$LOCALE}->{$s}); # cache when met + + $C->{$LOCALE}->{$s}; +} + +sub collate_xfrm { + my $s = $_[0]; + my $x = ''; + + for (split(/(\000+)/, $s)) { + $x .= (/^\000/) ? $_ : strxfrm("$_\000"); + } + + $x; +} + +sub collate_cmp { + &C($_[0]) cmp &C($_[1]); +} + +# init $LOCALE + +&I18N::Collate::setlocale(); + +1; # keep require happy |