diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-02 12:41:12 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-09-02 12:41:12 +0000 |
commit | f499c3861b7c35f971fe2027563597c0f31f3f45 (patch) | |
tree | 6beec7481907351a689a4fedb015f371ed945d69 | |
parent | 9731f9ce05d1c7e58d4cec595b6014e465a43d2d (diff) | |
download | perl-f499c3861b7c35f971fe2027563597c0f31f3f45.tar.gz |
Special casing had become a little bit more complex in Unicode 3.1.1.
p4raw-id: //depot/perl@11832
-rw-r--r-- | lib/Unicode/UCD.pm | 54 | ||||
-rw-r--r-- | lib/Unicode/UCD.t | 12 |
2 files changed, 54 insertions, 12 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index d4525ccf2b..841c373f3e 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -681,6 +681,11 @@ list overrides the normal behavior if all of the listed conditions are true. Case distinctions in the condition list are not significant. Conditions preceded by "NON_" represent the negation of the condition +Note that when there are multiple case folding definitions for a +single code point because of different locales, the value returned by +casespec() is a hash reference which has the locales as the keys and +hash references as described above as the values. + A I<locale> is defined as a 2-letter ISO 3166 country code, possibly followed by a "_" and a 2-letter ISO language code (possibly followed by a "_" and a variant code). You can find the lists of those codes, @@ -705,12 +710,49 @@ sub _casespec { if (openunicode(\$CASESPECFH, "SpecCase.txt")) { while (<$CASESPECFH>) { if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) { - my $code = hex($1); - $CASESPEC{$code} = { code => $1, - lower => $2, - title => $3, - upper => $4, - condition => $5 }; + my ($hexcode, $lower, $title, $upper, $condition) = + ($1, $2, $3, $4, $5); + my $code = hex($hexcode); + if (exists $CASESPEC{$code}) { + if (exists $CASESPEC{$code}->{code}) { + my ($oldlower, + $oldtitle, + $oldupper, + $oldcondition) = + @{$CASESPEC{$code}}{qw(lower + title + upper + condition)}; + my ($oldlocale) = + ($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/); + if (defined $oldlocale) { + delete $CASESPEC{$code}; + $CASESPEC{$code}->{$oldlocale} = + { code => $hexcode, + lower => $oldlower, + title => $oldtitle, + upper => $oldupper, + condition => $oldcondition }; + } else { + warn __PACKAGE__, ": SpecCase.txt:", $., ": No oldlocale for 0x$hexcode\n" + } + } + my ($locale) = + ($condition =~ /^([a-z][a-z](?:_\S+)?)/); + $CASESPEC{$code}->{$locale} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } else { + $CASESPEC{$code} = + { code => $hexcode, + lower => $lower, + title => $title, + upper => $upper, + condition => $condition }; + } } } close($CASESPECFH); diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 67e99d0b9d..7536e728e4 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -223,7 +223,7 @@ ok( charinrange($ranges, "13a0")); ok( charinrange($ranges, "13f4")); ok(!charinrange($ranges, "13f5")); -ok(Unicode::UCD::UnicodeVersion, 3.1); +ok(Unicode::UCD::UnicodeVersion, '3.1.1'); use Unicode::UCD qw(compexcl); @@ -264,8 +264,8 @@ ok($casespec->{code} eq '00DF' && $casespec = casespec(0x307); -ok($casespec->{code} eq '0307' && - $casespec->{lower} eq '0307' && - $casespec->{title} eq '' && - $casespec->{upper} eq '' && - $casespec->{condition} eq 'lt AFTER_i'); +ok($casespec->{az}->{code} eq '0307' && + $casespec->{az}->{lower} eq '' && + $casespec->{az}->{title} eq '0307' && + $casespec->{az}->{upper} eq '0307' && + $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE'); |