summaryrefslogtreecommitdiff
path: root/lib/Unicode
diff options
context:
space:
mode:
authorSteve Peters <steve@fisharerojo.org>2005-10-14 01:20:21 +0000
committerSteve Peters <steve@fisharerojo.org>2005-10-14 01:20:21 +0000
commit6d24ed100318c05343c5524872a98bee8e2fcca4 (patch)
tree26d8b59b4351f0787bb69a38b6acd68f75199b3f /lib/Unicode
parent417cd4a80014be11be785b60728258129d31e986 (diff)
downloadperl-6d24ed100318c05343c5524872a98bee8e2fcca4.tar.gz
Upgrade to Unicode-Collate-0.52
p4raw-id: //depot/perl@25756
Diffstat (limited to 'lib/Unicode')
-rw-r--r--lib/Unicode/Collate.pm47
-rw-r--r--lib/Unicode/Collate/Changes4
-rw-r--r--lib/Unicode/Collate/README8
-rw-r--r--lib/Unicode/Collate/t/test.t36
4 files changed, 67 insertions, 28 deletions
diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm
index e6196e0293..cd9b8e70e6 100644
--- a/lib/Unicode/Collate.pm
+++ b/lib/Unicode/Collate.pm
@@ -14,7 +14,7 @@ use File::Spec;
no warnings 'utf8';
-our $VERSION = '0.51';
+our $VERSION = '0.52';
our $PACKAGE = __PACKAGE__;
my @Path = qw(Unicode Collate);
@@ -305,29 +305,30 @@ sub read_table {
croak("$PACKAGE: Can't locate $f in \@INC (\@INC contains: @INC)");
}
- while (<$fh>) {
- next if /^\s*#/;
- unless (s/^\s*\@//) {
- $self->parseEntry($_);
+ while (my $line = <$fh>) {
+ next if $line =~ /^\s*#/;
+ unless ($line =~ s/^\s*\@//) {
+ $self->parseEntry($line);
next;
}
- if (/^version\s*(\S*)/) {
+ # matched ^\s*\@
+ if ($line =~ /^version\s*(\S*)/) {
$self->{versionTable} ||= $1;
}
- elsif (/^variable\s+(\S*)/) { # since UTS #10-9
+ elsif ($line =~ /^variable\s+(\S*)/) { # since UTS #10-9
$self->{variableTable} ||= $1;
}
- elsif (/^alternate\s+(\S*)/) { # till UTS #10-8
+ elsif ($line =~ /^alternate\s+(\S*)/) { # till UTS #10-8
$self->{alternateTable} ||= $1;
}
- elsif (/^backwards\s+(\S*)/) {
+ elsif ($line =~ /^backwards\s+(\S*)/) {
push @{ $self->{backwardsTable} }, $1;
}
- elsif (/^forwards\s+(\S*)/) { # parhaps no use
+ elsif ($line =~ /^forwards\s+(\S*)/) { # parhaps no use
push @{ $self->{forwardsTable} }, $1;
}
- elsif (/^rearrange\s+(.*)/) { # (\S*) is NG
+ elsif ($line =~ /^rearrange\s+(.*)/) { # (\S*) is NG
push @{ $self->{rearrangeTable} }, _getHexArray($1);
}
}
@@ -706,17 +707,17 @@ sub getSortKey
# modification of tertiary weights
if ($self->{upper_before_lower}) {
- foreach (@{ $ret[2] }) {
- if (0x8 <= $_ && $_ <= 0xC) { $_ -= 6 } # lower
- elsif (0x2 <= $_ && $_ <= 0x6) { $_ += 6 } # upper
- elsif ($_ == 0x1C) { $_ += 1 } # square upper
- elsif ($_ == 0x1D) { $_ -= 1 } # square lower
+ foreach my $w (@{ $ret[2] }) {
+ if (0x8 <= $w && $w <= 0xC) { $w -= 6 } # lower
+ elsif (0x2 <= $w && $w <= 0x6) { $w += 6 } # upper
+ elsif ($w == 0x1C) { $w += 1 } # square upper
+ elsif ($w == 0x1D) { $w -= 1 } # square lower
}
}
if ($self->{katakana_before_hiragana}) {
- foreach (@{ $ret[2] }) {
- if (0x0F <= $_ && $_ <= 0x13) { $_ -= 2 } # katakana
- elsif (0x0D <= $_ && $_ <= 0x0E) { $_ += 5 } # hiragana
+ foreach my $w (@{ $ret[2] }) {
+ if (0x0F <= $w && $w <= 0x13) { $w -= 2 } # katakana
+ elsif (0x0D <= $w && $w <= 0x0E) { $w += 5 } # hiragana
}
}
@@ -1790,8 +1791,8 @@ to use this module easily, it is recommended to install a table file
in the UCA format, by copying it under the directory
<a place in @INC>/Unicode/Collate.
-The most preferable one is "The Default Unicode Collation Element Table",
-available from the Unicode Consortium's website:
+The most preferable one is "The Default Unicode Collation Element Table"
+(aka DUCET), available from the Unicode Consortium's website:
http://www.unicode.org/Public/UCA/
@@ -1841,9 +1842,9 @@ This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
The file Unicode/Collate/allkeys.txt was copied directly
-from http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt (aka DUCET).
+from L<http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt>.
This file is Copyright (c) 1991-2005 Unicode, Inc. All rights reserved.
-Distributed under the Terms of Use in http://www.unicode.org/copyright.html
+Distributed under the Terms of Use in L<http://www.unicode.org/copyright.html>.
=head1 SEE ALSO
diff --git a/lib/Unicode/Collate/Changes b/lib/Unicode/Collate/Changes
index 082f3d54dc..120368cb49 100644
--- a/lib/Unicode/Collate/Changes
+++ b/lib/Unicode/Collate/Changes
@@ -1,5 +1,9 @@
Revision history for Perl module Unicode::Collate.
+0.52 Thu Oct 13 21:51:09 2005
+ - The Unicode::Collate->new method does not destroy user's $_ any longer.
+ (thanks to Jon Warbrick for bug report)
+
0.51 Sun May 29 20:21:19 2005
- Added the latest DUCET (for Unicode 4.1.0) as Collate/allkeys.txt,
which is not required to test this module.
diff --git a/lib/Unicode/Collate/README b/lib/Unicode/Collate/README
index 05c8f3744f..b2bc7f9666 100644
--- a/lib/Unicode/Collate/README
+++ b/lib/Unicode/Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.51
+Unicode/Collate version 0.52
===============================
NAME
@@ -34,8 +34,8 @@ You can install such a table file by adding it
to "Collate" directory (where "keys.txt" is placed) in this distribution
before executing Makefile.PL.
-The most preferable one is "The Default Unicode Collation Element Table",
-available from the Unicode Consortium's website:
+The most preferable one is "The Default Unicode Collation Element Table"
+(aka DUCET), available from the Unicode Consortium's website:
http://www.unicode.org/Public/UCA/
@@ -92,6 +92,6 @@ This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
The file Unicode/Collate/allkeys.txt was copied directly
-from http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt (aka DUCET).
+from http://www.unicode.org/Public/UCA/4.1.0/allkeys.txt
This file is Copyright (c) 1991-2005 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html
diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t
index a5337a014f..698802e9be 100644
--- a/lib/Unicode/Collate/t/test.t
+++ b/lib/Unicode/Collate/t/test.t
@@ -12,7 +12,7 @@ BEGIN {
}
use Test;
-BEGIN { plan tests => 107 };
+BEGIN { plan tests => 113 };
use strict;
use warnings;
@@ -361,5 +361,39 @@ $Collator->change(level => 4);
ok($Collator->gt("!\x{300}", ""));
ok($Collator->eq("!\x{300}", "!"));
+##### 108..113
+
+$_ = 'Foo';
+
+my $c = Unicode::Collate->new(
+ table => 'keys.txt',
+ normalization => undef,
+ upper_before_lower => 1,
+);
+
+ok($_, 'Foo'); # fixed at v. 0.52; no longer clobber $_
+
+my($temp, @temp); # Not the result but the side effect matters.
+
+$_ = 'Foo';
+$temp = $c->getSortKey("abc");
+ok($_, 'Foo');
+
+$_ = 'Foo';
+$temp = $c->viewSortKey("abc");
+ok($_, 'Foo');
+
+$_ = 'Foo';
+@temp = $c->sort("abc", "xyz", "def");
+ok($_, 'Foo');
+
+$_ = 'Foo';
+@temp = $c->index("perl5", "RL");
+ok($_, 'Foo');
+
+$_ = 'Foo';
+@temp = $c->index("perl5", "LR");
+ok($_, 'Foo');
+
#####