diff options
author | Karl Williamson <khw@cpan.org> | 2020-04-03 12:12:06 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2020-10-16 07:01:41 -0600 |
commit | 114fc8b6cf6259d91d5d2c5cf7509f3f5e8cf35b (patch) | |
tree | cf524c703ebcc123d0024a0e5e40b6bf91ba70d5 /regen/regcharclass_multi_char_folds.pl | |
parent | 70dc0cf11d00e208b9cf7abd3d31a83e245d2b5c (diff) | |
download | perl-114fc8b6cf6259d91d5d2c5cf7509f3f5e8cf35b.tar.gz |
regen/regcharclass_multi_char_folds.pl: Use case fold
Prior to this commit, only the upper case of Latin1 characters was dealt
with. But we really want case folding, and there are a few other
characters that fold to Latin1. This commit acknowledges them.
Diffstat (limited to 'regen/regcharclass_multi_char_folds.pl')
-rw-r--r-- | regen/regcharclass_multi_char_folds.pl | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/regen/regcharclass_multi_char_folds.pl b/regen/regcharclass_multi_char_folds.pl index 8cf9837397..a72e1497ce 100644 --- a/regen/regcharclass_multi_char_folds.pl +++ b/regen/regcharclass_multi_char_folds.pl @@ -73,6 +73,19 @@ sub multi_char_folds ($$) { die "Incorrect format '$format' for Case_Folding inversion map" unless $format eq 'al'; + my %inverse_latin1_folds; + for my $i (0 .. @$cp_ref - 1) { + next if ref $folds_ref->[$i]; # multi-char fold + next if $folds_ref->[$i] == 0; # Not folded + my $cp_base = $cp_ref->[$i]; + + for my $j ($cp_base .. $cp_ref->[$i+1] - 1) { + my $folded_base = $folds_ref->[$i]; + next if $folded_base > 255; # only interested in Latin1 + push @{$inverse_latin1_folds{$folded_base + $j - $cp_base}}, $j; + } + } + my @folds; my @output_folds; @@ -118,9 +131,11 @@ sub multi_char_folds ($$) { my $this_ord = $this_fold_ref->[$j]; undef $this_fold_ref->[$j]; - if ($this_ord < 256 && chr($this_ord) =~ /\p{Cased}/) { - my $uc = ord(uc(chr($this_ord))); - @{$this_fold_ref->[$j]} = ( $this_ord, $uc); + # If the fold is to a Latin1-range cased letter, replace the entry + # with an array which also includes everything that folds to it. + if (exists $inverse_latin1_folds{$this_ord}) { + push @{$this_fold_ref->[$j]}, + ( $this_ord, @{$inverse_latin1_folds{$this_ord}} ); } else { # Otherwise, just itself. (gen_combinations() needs a ref) @{$this_fold_ref->[$j]} = ( $this_ord ); |