blob: 52c78abe83dac83e9f75869a0d331675f350519a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
package I18N::Collate;
# Collate.pm
#
# Author: Jarkko Hietaniemi <Jarkko.Hietaniemi@hut.fi>
# Helsinki University of Technology, Finland
#
# Acks: Guy Decoux <decoux@moulon.inra.fr> understood
# overloading magic much deeper than I and told
# how to cut the size of this code by more than half.
# (my first version did overload all of lt gt eq le ge cmp)
#
# Purpose: compare 8-bit scalar data according to the current locale
#
# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm()
#
# Exports: setlocale 1)
# collate_xfrm 2)
#
# Overloads: cmp # 3)
#
# Usage: use Collate;
# setlocale(&LC_COLLATE, 'locale-of-your-choice'); # 4)
# $s1 = new Collate "scalar_data_1";
# $s2 = new Collate "scalar_data_2";
#
# now you can compare $s1 and $s2: $s1 le $s2
# to extract the data itself, you need to deref: $$s1
#
# Notes:
# 1) this uses POSIX::setlocale
# 2) the basic collation conversion is done by strxfrm() which
# terminates at NUL characters being a decent C routine.
# collate_xfrm handles embedded NUL characters gracefully.
# 3) due to cmp and overload magic, lt le eq ge gt work also
# 4) the available locales depend on your operating system;
# try whether "locale -a" shows them or the more direct
# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
# The locale names are probably something like
# 'xx_XX.(ISO)?8859-N'.
#
# Updated: 19940913 1341 GMT
#
# ---
use POSIX qw(strxfrm LC_COLLATE);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
@EXPORT_OK = qw();
%OVERLOAD = qw(
fallback 1
cmp collate_cmp
);
sub new { my $new = $_[1]; bless \$new }
sub setlocale {
my ($category, $locale) = @_[0,1];
POSIX::setlocale($category, $locale) if (defined $category);
# the current $LOCALE
$LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
}
sub C {
my $s = ${$_[0]};
$C->{$LOCALE}->{$s} = collate_xfrm($s)
unless (defined $C->{$LOCALE}->{$s}); # cache when met
$C->{$LOCALE}->{$s};
}
sub collate_xfrm {
my $s = $_[0];
my $x = '';
for (split(/(\000+)/, $s)) {
$x .= (/^\000/) ? $_ : strxfrm("$_\000");
}
$x;
}
sub collate_cmp {
&C($_[0]) cmp &C($_[1]);
}
# init $LOCALE
&I18N::Collate::setlocale();
1; # keep require happy
|