summaryrefslogtreecommitdiff
path: root/lib/I18N/Collate.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/I18N/Collate.pm')
-rw-r--r--lib/I18N/Collate.pm97
1 files changed, 97 insertions, 0 deletions
diff --git a/lib/I18N/Collate.pm b/lib/I18N/Collate.pm
new file mode 100644
index 0000000000..52c78abe83
--- /dev/null
+++ b/lib/I18N/Collate.pm
@@ -0,0 +1,97 @@
+package I18N::Collate;
+
+# Collate.pm
+#
+# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
+# Helsinki University of Technology, Finland
+#
+# Acks: Guy Decoux <decoux@moulon.inra.fr> understood
+# overloading magic much deeper than I and told
+# how to cut the size of this code by more than half.
+# (my first version did overload all of lt gt eq le ge cmp)
+#
+# Purpose: compare 8-bit scalar data according to the current locale
+#
+# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
+#
+# Exports: setlocale 1)
+# collate_xfrm 2)
+#
+# Overloads: cmp # 3)
+#
+# Usage: use Collate;
+# setlocale(&LC_COLLATE, 'locale-of-your-choice'); # 4)
+# $s1 = new Collate "scalar_data_1";
+# $s2 = new Collate "scalar_data_2";
+#
+# now you can compare $s1 and $s2: $s1 le $s2
+# to extract the data itself, you need to deref: $$s1
+#
+# Notes:
+# 1) this uses POSIX::setlocale
+# 2) the basic collation conversion is done by strxfrm() which
+# terminates at NUL characters being a decent C routine.
+# collate_xfrm handles embedded NUL characters gracefully.
+# 3) due to cmp and overload magic, lt le eq ge gt work also
+# 4) the available locales depend on your operating system;
+# try whether "locale -a" shows them or the more direct
+# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
+# The locale names are probably something like
+# 'xx_XX.(ISO)?8859-N'.
+#
+# Updated: 19940913 1341 GMT
+#
+# ---
+
+use POSIX qw(strxfrm LC_COLLATE);
+
+require Exporter;
+
+@ISA = qw(Exporter);
+@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
+@EXPORT_OK = qw();
+
+%OVERLOAD = qw(
+fallback 1
+cmp collate_cmp
+);
+
+sub new { my $new = $_[1]; bless \$new }
+
+sub setlocale {
+ my ($category, $locale) = @_[0,1];
+
+ POSIX::setlocale($category, $locale) if (defined $category);
+ # the current $LOCALE
+ $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
+}
+
+sub C {
+ my $s = ${$_[0]};
+
+ $C->{$LOCALE}->{$s} = collate_xfrm($s)
+ unless (defined $C->{$LOCALE}->{$s}); # cache when met
+
+ $C->{$LOCALE}->{$s};
+}
+
+sub collate_xfrm {
+ my $s = $_[0];
+ my $x = '';
+
+ for (split(/(\000+)/, $s)) {
+ $x .= (/^\000/) ? $_ : strxfrm("$_\000");
+ }
+
+ $x;
+}
+
+sub collate_cmp {
+ &C($_[0]) cmp &C($_[1]);
+}
+
+# init $LOCALE
+
+&I18N::Collate::setlocale();
+
+1; # keep require happy