summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-09-02 12:41:12 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-09-02 12:41:12 +0000
commitf499c3861b7c35f971fe2027563597c0f31f3f45 (patch)
tree6beec7481907351a689a4fedb015f371ed945d69
parent9731f9ce05d1c7e58d4cec595b6014e465a43d2d (diff)
downloadperl-f499c3861b7c35f971fe2027563597c0f31f3f45.tar.gz
Special casing had become a little bit more complex in Unicode 3.1.1.
p4raw-id: //depot/perl@11832
-rw-r--r--lib/Unicode/UCD.pm54
-rw-r--r--lib/Unicode/UCD.t12
2 files changed, 54 insertions, 12 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index d4525ccf2b..841c373f3e 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -681,6 +681,11 @@ list overrides the normal behavior if all of the listed conditions are
true. Case distinctions in the condition list are not significant.
Conditions preceded by "NON_" represent the negation of the condition
+Note that when there are multiple case folding definitions for a
+single code point because of different locales, the value returned by
+casespec() is a hash reference which has the locales as the keys and
+hash references as described above as the values.
+
A I<locale> is defined as a 2-letter ISO 3166 country code, possibly
followed by a "_" and a 2-letter ISO language code (possibly followed
by a "_" and a variant code). You can find the lists of those codes,
@@ -705,12 +710,49 @@ sub _casespec {
if (openunicode(\$CASESPECFH, "SpecCase.txt")) {
while (<$CASESPECFH>) {
if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) {
- my $code = hex($1);
- $CASESPEC{$code} = { code => $1,
- lower => $2,
- title => $3,
- upper => $4,
- condition => $5 };
+ my ($hexcode, $lower, $title, $upper, $condition) =
+ ($1, $2, $3, $4, $5);
+ my $code = hex($hexcode);
+ if (exists $CASESPEC{$code}) {
+ if (exists $CASESPEC{$code}->{code}) {
+ my ($oldlower,
+ $oldtitle,
+ $oldupper,
+ $oldcondition) =
+ @{$CASESPEC{$code}}{qw(lower
+ title
+ upper
+ condition)};
+ my ($oldlocale) =
+ ($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/);
+ if (defined $oldlocale) {
+ delete $CASESPEC{$code};
+ $CASESPEC{$code}->{$oldlocale} =
+ { code => $hexcode,
+ lower => $oldlower,
+ title => $oldtitle,
+ upper => $oldupper,
+ condition => $oldcondition };
+ } else {
+ warn __PACKAGE__, ": SpecCase.txt:", $., ": No oldlocale for 0x$hexcode\n"
+ }
+ }
+ my ($locale) =
+ ($condition =~ /^([a-z][a-z](?:_\S+)?)/);
+ $CASESPEC{$code}->{$locale} =
+ { code => $hexcode,
+ lower => $lower,
+ title => $title,
+ upper => $upper,
+ condition => $condition };
+ } else {
+ $CASESPEC{$code} =
+ { code => $hexcode,
+ lower => $lower,
+ title => $title,
+ upper => $upper,
+ condition => $condition };
+ }
}
}
close($CASESPECFH);
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 67e99d0b9d..7536e728e4 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -223,7 +223,7 @@ ok( charinrange($ranges, "13a0"));
ok( charinrange($ranges, "13f4"));
ok(!charinrange($ranges, "13f5"));
-ok(Unicode::UCD::UnicodeVersion, 3.1);
+ok(Unicode::UCD::UnicodeVersion, '3.1.1');
use Unicode::UCD qw(compexcl);
@@ -264,8 +264,8 @@ ok($casespec->{code} eq '00DF' &&
$casespec = casespec(0x307);
-ok($casespec->{code} eq '0307' &&
- $casespec->{lower} eq '0307' &&
- $casespec->{title} eq '' &&
- $casespec->{upper} eq '' &&
- $casespec->{condition} eq 'lt AFTER_i');
+ok($casespec->{az}->{code} eq '0307' &&
+ $casespec->{az}->{lower} eq '' &&
+ $casespec->{az}->{title} eq '0307' &&
+ $casespec->{az}->{upper} eq '0307' &&
+ $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE');