summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-06-18 12:44:55 -0600
committerKarl Williamson <public@khwilliamson.com>2012-08-02 09:24:52 -0600
commitc4854deaa322105e69b0d84640b741c340711a77 (patch)
tree9ba1b47f23b323a024cf80b1a125b3b972c99e44 /regen
parent92feec867b2013acee7182eaa2b0f076a6e3aaec (diff)
downloadperl-c4854deaa322105e69b0d84640b741c340711a77.tar.gz
regen/mk_invlists: Add mode to generate above-Latin1 only
This change adds the ability to specify that an output inversion list is to contain only those code points that are above Latin-1. Typically, the Latin-1 ones will be accessed from some other means.
Diffstat (limited to 'regen')
-rw-r--r--regen/mk_invlists.pl26
1 files changed, 23 insertions, 3 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 65d6995285..97b0eec38a 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -144,11 +144,13 @@ for my $prop (qw(
# infinity. For example, the Upper property doesn't include the character
# at 255, but does include the one at 256. We don't include the 256 one.
my $lookup_prop = $prop;
- $lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//;
- my @invlist = prop_invlist($lookup_prop);
+ my $l1_only = ($lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//);
+ my $nonl1_only = 0;
+ $nonl1_only = $lookup_prop =~ s/^NonL1// unless $l1_only;
+ my @invlist = prop_invlist($lookup_prop, '_perl_core_internal_ok');
die "Could not find inversion list for '$lookup_prop'" unless @invlist;
- if ($lookup_prop ne $prop) {
+ if ($l1_only) {
for my $i (0 .. @invlist - 1 - 1) {
if ($invlist[$i] > 255) {
@@ -171,6 +173,24 @@ for my $prop (qw(
}
}
}
+ elsif ($nonl1_only) {
+ my $found_nonl1 = 0;
+ for my $i (0 .. @invlist - 1 - 1) {
+ next if $invlist[$i] < 256;
+
+ # Here, we have the first element in the array that indicates an
+ # element above Latin1. Get rid of all previous ones.
+ splice @invlist, 0, $i;
+
+ # If this one's index is not divisible by 2, it means that this
+ # element is inverting away from being in the list, which means
+ # all code points from 256 to this one are in this list.
+ unshift @invlist, 256 if $i % 2 != 0;
+ $found_nonl1 = 1;
+ last;
+ }
+ die "No non-Latin1 code points in $lookup_prop" unless $found_nonl1;
+ }
output_invlist($prop, \@invlist);
}