summaryrefslogtreecommitdiff
path: root/dist/I18N-Collate/lib/I18N/Collate.pm
diff options
context:
space:
mode:
Diffstat (limited to 'dist/I18N-Collate/lib/I18N/Collate.pm')
-rw-r--r--dist/I18N-Collate/lib/I18N/Collate.pm196
1 files changed, 196 insertions, 0 deletions
diff --git a/dist/I18N-Collate/lib/I18N/Collate.pm b/dist/I18N-Collate/lib/I18N/Collate.pm
new file mode 100644
index 0000000000..95c9c1c35c
--- /dev/null
+++ b/dist/I18N-Collate/lib/I18N/Collate.pm
@@ -0,0 +1,196 @@
+package I18N::Collate;
+
+use strict;
+our $VERSION = '1.01';
+
+=head1 NAME
+
+I18N::Collate - compare 8-bit scalar data according to the current locale
+
+=head1 SYNOPSIS
+
+ use I18N::Collate;
+ setlocale(LC_COLLATE, 'locale-of-your-choice');
+ $s1 = I18N::Collate->new("scalar_data_1");
+ $s2 = I18N::Collate->new("scalar_data_2");
+
+=head1 DESCRIPTION
+
+ ***
+
+ WARNING: starting from the Perl version 5.003_06
+ the I18N::Collate interface for comparing 8-bit scalar data
+ according to the current locale
+
+ HAS BEEN DEPRECATED
+
+ That is, please do not use it anymore for any new applications
+ and please migrate the old applications away from it because its
+ functionality was integrated into the Perl core language in the
+ release 5.003_06.
+
+ See the perllocale manual page for further information.
+
+ ***
+
+This module provides you with objects that will collate
+according to your national character set, provided that the
+POSIX setlocale() function is supported on your system.
+
+You can compare $s1 and $s2 above with
+
+ $s1 le $s2
+
+to extract the data itself, you'll need a dereference: $$s1
+
+This module uses POSIX::setlocale(). The basic collation conversion is
+done by strxfrm() which terminates at NUL characters being a decent C
+routine. collate_xfrm() handles embedded NUL characters gracefully.
+
+The available locales depend on your operating system; try whether
+C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
+direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
+C<ls /usr/lib/locale>. Not all the locales that your vendor supports
+are necessarily installed: please consult your operating system's
+documentation and possibly your local system administration. The
+locale names are probably something like C<xx_XX.(ISO)?8859-N> or
+C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
+variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
+European character set.
+
+=cut
+
+# I18N::Collate.pm
+#
+# Author: Jarkko Hietaniemi <F<jhi@iki.fi>>
+# Helsinki University of Technology, Finland
+#
+# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood
+# overloading magic much deeper than I and told
+# how to cut the size of this code by more than half.
+# (my first version did overload all of lt gt eq le ge cmp)
+#
+# Purpose: compare 8-bit scalar data according to the current locale
+#
+# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
+#
+# Exports: setlocale 1)
+# collate_xfrm 2)
+#
+# Overloads: cmp # 3)
+#
+# Usage: use I18N::Collate;
+# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
+# $s1 = I18N::Collate->("scalar_data_1");
+# $s2 = I18N::Collate->("scalar_data_2");
+#
+# now you can compare $s1 and $s2: $s1 le $s2
+# to extract the data itself, you need to deref: $$s1
+#
+# Notes:
+# 1) this uses POSIX::setlocale
+# 2) the basic collation conversion is done by strxfrm() which
+# terminates at NUL characters being a decent C routine.
+# collate_xfrm handles embedded NUL characters gracefully.
+# 3) due to cmp and overload magic, lt le eq ge gt work also
+# 4) the available locales depend on your operating system;
+# try whether "locale -a" shows them or man pages for
+# "locale" or "nlsinfo" work or the more direct
+# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
+# Not all the locales that your vendor supports
+# are necessarily installed: please consult your
+# operating system's documentation.
+# The locale names are probably something like
+# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
+# for example 'fr_CH.ISO8859-1' is the Swiss (CH)
+# variant of French (fr), ISO Latin (8859) 1 (-1)
+# which is the Western European character set.
+#
+# Updated: 19961005
+#
+# ---
+
+use POSIX qw(strxfrm LC_COLLATE);
+use warnings::register;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
+our @EXPORT_OK = qw();
+
+use overload qw(
+fallback 1
+cmp collate_cmp
+);
+
+our($LOCALE, $C);
+
+our $please_use_I18N_Collate_even_if_deprecated = 0;
+sub new {
+ my $new = $_[1];
+
+ if (warnings::enabled() && $] >= 5.003_06) {
+ unless ($please_use_I18N_Collate_even_if_deprecated) {
+ warnings::warn <<___EOD___;
+***
+
+ WARNING: starting from the Perl version 5.003_06
+ the I18N::Collate interface for comparing 8-bit scalar data
+ according to the current locale
+
+ HAS BEEN DEPRECATED
+
+ That is, please do not use it anymore for any new applications
+ and please migrate the old applications away from it because its
+ functionality was integrated into the Perl core language in the
+ release 5.003_06.
+
+ See the perllocale manual page for further information.
+
+***
+___EOD___
+ $please_use_I18N_Collate_even_if_deprecated++;
+ }
+ }
+
+ bless \$new;
+}
+
+sub setlocale {
+ my ($category, $locale) = @_[0,1];
+
+ POSIX::setlocale($category, $locale) if (defined $category);
+ # the current $LOCALE
+ $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
+}
+
+sub C {
+ my $s = ${$_[0]};
+
+ $C->{$LOCALE}->{$s} = collate_xfrm($s)
+ unless (defined $C->{$LOCALE}->{$s}); # cache when met
+
+ $C->{$LOCALE}->{$s};
+}
+
+sub collate_xfrm {
+ my $s = $_[0];
+ my $x = '';
+
+ for (split(/(\000+)/, $s)) {
+ $x .= (/^\000/) ? $_ : strxfrm("$_\000");
+ }
+
+ $x;
+}
+
+sub collate_cmp {
+ &C($_[0]) cmp &C($_[1]);
+}
+
+# init $LOCALE
+
+&I18N::Collate::setlocale();
+
+1; # keep require happy