diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-06-18 12:44:55 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-08-02 09:24:52 -0600 |
commit | c4854deaa322105e69b0d84640b741c340711a77 (patch) | |
tree | 9ba1b47f23b323a024cf80b1a125b3b972c99e44 /regen | |
parent | 92feec867b2013acee7182eaa2b0f076a6e3aaec (diff) | |
download | perl-c4854deaa322105e69b0d84640b741c340711a77.tar.gz |
regen/mk_invlists: Add mode to generate above-Latin1 only
This change adds the ability to specify that an output inversion list is
to contain only those code points that are above Latin-1. Typically,
the Latin-1 ones will be accessed from some other means.
Diffstat (limited to 'regen')
-rw-r--r-- | regen/mk_invlists.pl | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 65d6995285..97b0eec38a 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -144,11 +144,13 @@ for my $prop (qw( # infinity. For example, the Upper property doesn't include the character # at 255, but does include the one at 256. We don't include the 256 one. my $lookup_prop = $prop; - $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//; - my @invlist = prop_invlist($lookup_prop); + my $l1_only = ($lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//); + my $nonl1_only = 0; + $nonl1_only = $lookup_prop =~ s/^NonL1// unless $l1_only; + my @invlist = prop_invlist($lookup_prop, '_perl_core_internal_ok'); die "Could not find inversion list for '$lookup_prop'" unless @invlist; - if ($lookup_prop ne $prop) { + if ($l1_only) { for my $i (0 .. @invlist - 1 - 1) { if ($invlist[$i] > 255) { @@ -171,6 +173,24 @@ for my $prop (qw( } } } + elsif ($nonl1_only) { + my $found_nonl1 = 0; + for my $i (0 .. @invlist - 1 - 1) { + next if $invlist[$i] < 256; + + # Here, we have the first element in the array that indicates an + # element above Latin1. Get rid of all previous ones. + splice @invlist, 0, $i; + + # If this one's index is not divisible by 2, it means that this + # element is inverting away from being in the list, which means + # all code points from 256 to this one are in this list. + unshift @invlist, 256 if $i % 2 != 0; + $found_nonl1 = 1; + last; + } + die "No non-Latin1 code points in $lookup_prop" unless $found_nonl1; + } output_invlist($prop, \@invlist); } |