summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-06-24 12:37:55 -0600
committerKarl Williamson <khw@cpan.org>2015-07-28 22:15:53 -0600
commit377a58578acd4376703a673bb8df01ce912cce1d (patch)
tree103a8cb2e8be0e9d94592d1c8434ade78e5fae62 /regen
parent37383e23159ff2a28bdeba94e6d4757968819d28 (diff)
downloadperl-377a58578acd4376703a673bb8df01ce912cce1d.tar.gz
regen/mk_PL_charclass.pl: Don't confuse simple with multi folds
On early Unicode releases, this was saying that a character had a simple fold from above Latin1, whereas it didn't. This was caused by not keeping the simple folds separate from the multi-character ones. The solution is to keep a separate data structure for the simple ones.
Diffstat (limited to 'regen')
-rw-r--r--regen/mk_PL_charclass.pl18
1 files changed, 15 insertions, 3 deletions
diff --git a/regen/mk_PL_charclass.pl b/regen/mk_PL_charclass.pl
index 6a0743bb56..17280fd80f 100644
--- a/regen/mk_PL_charclass.pl
+++ b/regen/mk_PL_charclass.pl
@@ -51,6 +51,7 @@ my @properties = qw(
# Read in the case fold mappings.
my %folded_closure;
+my %simple_folded_closure;
my @hex_non_final_folds;
my @non_latin1_simple_folds;
my @folds;
@@ -118,8 +119,14 @@ BEGIN { # Have to do this at compile time because using user-defined \p{property
for my $i (0 .. @folded - 1) {
my $hex_fold = $folded[$i];
my $fold = hex $hex_fold;
- push @{$folded_closure{$fold}}, $from if $fold < 256;
- push @{$folded_closure{$from}}, $fold if $from < 256;
+ if ($fold < 256) {
+ push @{$folded_closure{$fold}}, $from;
+ push @{$simple_folded_closure{$fold}}, $from if $fold_type ne 'F';
+ }
+ if ($from < 256) {
+ push @{$folded_closure{$from}}, $fold;
+ push @{$simple_folded_closure{$from}}, $fold if $fold_type ne 'F';
+ }
if (($fold_type eq 'C' || $fold_type eq 'S')
&& ($fold < 256 != $from < 256))
@@ -153,11 +160,16 @@ BEGIN { # Have to do this at compile time because using user-defined \p{property
push @{$folded_closure{$from}}, @{$folded_closure{$folded}};
}
}
+ foreach my $folded (keys %simple_folded_closure) {
+ foreach my $from (grep { $_ < 256 } @{$simple_folded_closure{$folded}}) {
+ push @{$simple_folded_closure{$from}}, @{$simple_folded_closure{$folded}};
+ }
+ }
# We have the single-character folds that cross the 255/256, like KELVIN
# SIGN => 'k', but we need the closure, so add like 'K' to it
foreach my $folded (@non_latin1_simple_folds) {
- foreach my $fold (@{$folded_closure{$folded}}) {
+ foreach my $fold (@{$simple_folded_closure{$folded}}) {
if ($fold < 256 && ! grep { $fold == $_ } @non_latin1_simple_folds) {
push @non_latin1_simple_folds, $fold;
}