Special casing had become a little bit more complex in Unicode 3.1.1.

p4raw-id: //depot/perl@11832
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-09-02 12:41:12 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-09-02 12:41:12 +0000
commit: f499c3861b7c35f971fe2027563597c0f31f3f45 (patch)
tree: 6beec7481907351a689a4fedb015f371ed945d69
parent: 9731f9ce05d1c7e58d4cec595b6014e465a43d2d (diff)
download: perl-f499c3861b7c35f971fe2027563597c0f31f3f45.tar.gz
2 files changed, 54 insertions, 12 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index d4525ccf2b..841c373f3e 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -681,6 +681,11 @@ list overrides the normal behavior if all of the listed conditions are
 true.  Case distinctions in the condition list are not significant.
 Conditions preceded by "NON_" represent the negation of the condition
 
+Note that when there are multiple case folding definitions for a
+single code point because of different locales, the value returned by
+casespec() is a hash reference which has the locales as the keys and
+hash references as described above as the values.
+
 A I<locale> is defined as a 2-letter ISO 3166 country code, possibly
 followed by a "_" and a 2-letter ISO language code (possibly followed
 by a "_" and a variant code).  You can find the lists of those codes,
@@ -705,12 +710,49 @@ sub _casespec {
 	if (openunicode(\$CASESPECFH, "SpecCase.txt")) {
 	    while (<$CASESPECFH>) {
 		if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) {
-		    my $code = hex($1);
-		    $CASESPEC{$code} = { code      => $1,
-					 lower     => $2,
-					 title     => $3,
-					 upper     => $4,
-					 condition => $5 };
+		    my ($hexcode, $lower, $title, $upper, $condition) =
+			($1, $2, $3, $4, $5);
+		    my $code = hex($hexcode);
+		    if (exists $CASESPEC{$code}) {
+			if (exists $CASESPEC{$code}->{code}) {
+			    my ($oldlower,
+				$oldtitle,
+				$oldupper,
+				$oldcondition) =
+				    @{$CASESPEC{$code}}{qw(lower
+							   title
+							   upper
+							   condition)};
+			    my ($oldlocale) =
+				($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/);
+			    if (defined $oldlocale) {
+				delete $CASESPEC{$code};
+				$CASESPEC{$code}->{$oldlocale} =
+				{ code      => $hexcode,
+				  lower     => $oldlower,
+				  title     => $oldtitle,
+				  upper     => $oldupper,
+				  condition => $oldcondition };
+			    } else {
+				warn __PACKAGE__, ": SpecCase.txt:", $., ": No oldlocale for 0x$hexcode\n"
+			    }
+			}
+			my ($locale) =
+			    ($condition =~ /^([a-z][a-z](?:_\S+)?)/);
+			$CASESPEC{$code}->{$locale} =
+			{ code      => $hexcode,
+			  lower     => $lower,
+			  title     => $title,
+			  upper     => $upper,
+			  condition => $condition };
+		    } else {
+			$CASESPEC{$code} =
+			{ code      => $hexcode,
+			  lower     => $lower,
+			  title     => $title,
+			  upper     => $upper,
+			  condition => $condition };
+		    }
 		}
 	    }
 	    close($CASESPECFH);
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 67e99d0b9d..7536e728e4 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -223,7 +223,7 @@ ok( charinrange($ranges, "13a0"));
 ok( charinrange($ranges, "13f4"));
 ok(!charinrange($ranges, "13f5"));
 
-ok(Unicode::UCD::UnicodeVersion, 3.1);
+ok(Unicode::UCD::UnicodeVersion, '3.1.1');
 
 use Unicode::UCD qw(compexcl);
 
@@ -264,8 +264,8 @@ ok($casespec->{code} eq '00DF' &&
 
 $casespec = casespec(0x307);
 
-ok($casespec->{code} eq '0307' &&
-   $casespec->{lower} eq '0307'  &&
-   $casespec->{title} eq ''  &&
-   $casespec->{upper} eq '' &&
-   $casespec->{condition} eq 'lt AFTER_i');
+ok($casespec->{az}->{code} eq '0307' &&
+   $casespec->{az}->{lower} eq ''  &&
+   $casespec->{az}->{title} eq '0307'  &&
+   $casespec->{az}->{upper} eq '0307' &&
+   $casespec->{az}->{condition} eq 'az AFTER_i NOT_MORE_ABOVE');
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-09-02 12:41:12 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-09-02 12:41:12 +0000
commit	f499c3861b7c35f971fe2027563597c0f31f3f45 (patch)
tree	6beec7481907351a689a4fedb015f371ed945d69
parent	9731f9ce05d1c7e58d4cec595b6014e465a43d2d (diff)
download	perl-f499c3861b7c35f971fe2027563597c0f31f3f45.tar.gz