diff options
Diffstat (limited to 'dist/I18N-Collate/lib/I18N/Collate.pm')
-rw-r--r-- | dist/I18N-Collate/lib/I18N/Collate.pm | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/dist/I18N-Collate/lib/I18N/Collate.pm b/dist/I18N-Collate/lib/I18N/Collate.pm new file mode 100644 index 0000000000..95c9c1c35c --- /dev/null +++ b/dist/I18N-Collate/lib/I18N/Collate.pm @@ -0,0 +1,196 @@ +package I18N::Collate; + +use strict; +our $VERSION = '1.01'; + +=head1 NAME + +I18N::Collate - compare 8-bit scalar data according to the current locale + +=head1 SYNOPSIS + + use I18N::Collate; + setlocale(LC_COLLATE, 'locale-of-your-choice'); + $s1 = I18N::Collate->new("scalar_data_1"); + $s2 = I18N::Collate->new("scalar_data_2"); + +=head1 DESCRIPTION + + *** + + WARNING: starting from the Perl version 5.003_06 + the I18N::Collate interface for comparing 8-bit scalar data + according to the current locale + + HAS BEEN DEPRECATED + + That is, please do not use it anymore for any new applications + and please migrate the old applications away from it because its + functionality was integrated into the Perl core language in the + release 5.003_06. + + See the perllocale manual page for further information. + + *** + +This module provides you with objects that will collate +according to your national character set, provided that the +POSIX setlocale() function is supported on your system. + +You can compare $s1 and $s2 above with + + $s1 le $s2 + +to extract the data itself, you'll need a dereference: $$s1 + +This module uses POSIX::setlocale(). The basic collation conversion is +done by strxfrm() which terminates at NUL characters being a decent C +routine. collate_xfrm() handles embedded NUL characters gracefully. + +The available locales depend on your operating system; try whether +C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the +direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or +C<ls /usr/lib/locale>. Not all the locales that your vendor supports +are necessarily installed: please consult your operating system's +documentation and possibly your local system administration. The +locale names are probably something like C<xx_XX.(ISO)?8859-N> or +C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH) +variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western +European character set. + +=cut + +# I18N::Collate.pm +# +# Author: Jarkko Hietaniemi <F<jhi@iki.fi>> +# Helsinki University of Technology, Finland +# +# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood +# overloading magic much deeper than I and told +# how to cut the size of this code by more than half. +# (my first version did overload all of lt gt eq le ge cmp) +# +# Purpose: compare 8-bit scalar data according to the current locale +# +# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() +# +# Exports: setlocale 1) +# collate_xfrm 2) +# +# Overloads: cmp # 3) +# +# Usage: use I18N::Collate; +# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) +# $s1 = I18N::Collate->("scalar_data_1"); +# $s2 = I18N::Collate->("scalar_data_2"); +# +# now you can compare $s1 and $s2: $s1 le $s2 +# to extract the data itself, you need to deref: $$s1 +# +# Notes: +# 1) this uses POSIX::setlocale +# 2) the basic collation conversion is done by strxfrm() which +# terminates at NUL characters being a decent C routine. +# collate_xfrm handles embedded NUL characters gracefully. +# 3) due to cmp and overload magic, lt le eq ge gt work also +# 4) the available locales depend on your operating system; +# try whether "locale -a" shows them or man pages for +# "locale" or "nlsinfo" work or the more direct +# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". +# Not all the locales that your vendor supports +# are necessarily installed: please consult your +# operating system's documentation. +# The locale names are probably something like +# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', +# for example 'fr_CH.ISO8859-1' is the Swiss (CH) +# variant of French (fr), ISO Latin (8859) 1 (-1) +# which is the Western European character set. +# +# Updated: 19961005 +# +# --- + +use POSIX qw(strxfrm LC_COLLATE); +use warnings::register; + +require Exporter; + +our @ISA = qw(Exporter); +our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); +our @EXPORT_OK = qw(); + +use overload qw( +fallback 1 +cmp collate_cmp +); + +our($LOCALE, $C); + +our $please_use_I18N_Collate_even_if_deprecated = 0; +sub new { + my $new = $_[1]; + + if (warnings::enabled() && $] >= 5.003_06) { + unless ($please_use_I18N_Collate_even_if_deprecated) { + warnings::warn <<___EOD___; +*** + + WARNING: starting from the Perl version 5.003_06 + the I18N::Collate interface for comparing 8-bit scalar data + according to the current locale + + HAS BEEN DEPRECATED + + That is, please do not use it anymore for any new applications + and please migrate the old applications away from it because its + functionality was integrated into the Perl core language in the + release 5.003_06. + + See the perllocale manual page for further information. + +*** +___EOD___ + $please_use_I18N_Collate_even_if_deprecated++; + } + } + + bless \$new; +} + +sub setlocale { + my ($category, $locale) = @_[0,1]; + + POSIX::setlocale($category, $locale) if (defined $category); + # the current $LOCALE + $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; +} + +sub C { + my $s = ${$_[0]}; + + $C->{$LOCALE}->{$s} = collate_xfrm($s) + unless (defined $C->{$LOCALE}->{$s}); # cache when met + + $C->{$LOCALE}->{$s}; +} + +sub collate_xfrm { + my $s = $_[0]; + my $x = ''; + + for (split(/(\000+)/, $s)) { + $x .= (/^\000/) ? $_ : strxfrm("$_\000"); + } + + $x; +} + +sub collate_cmp { + &C($_[0]) cmp &C($_[1]); +} + +# init $LOCALE + +&I18N::Collate::setlocale(); + +1; # keep require happy |