summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-10-20 15:13:22 -0600
committerKarl Williamson <public@khwilliamson.com>2012-10-20 17:31:49 -0600
commita02047bf497b08d7118eb5d4ae83844031835f80 (patch)
tree3e60aca7f21a0cd831e9e8d0e92a24b3ed9f0bd2 /regen
parentdc2a396bba8663b0124f0368814efac7cc73496e (diff)
downloadperl-a02047bf497b08d7118eb5d4ae83844031835f80.tar.gz
regen/mk_invlists.pl: Generate our own tables for certain properties
The two affected inversion lists are used only in regen. It is wasteful to have mktables generate these, as they aren't used elsewhere and just take up disk space.
Diffstat (limited to 'regen')
-rw-r--r--regen/mk_invlists.pl78
1 files changed, 72 insertions, 6 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index b9be301c00..e61104d0c2 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -2,7 +2,7 @@
use 5.015;
use strict;
use warnings;
-use Unicode::UCD "prop_invlist";
+use Unicode::UCD qw(prop_invlist prop_invmap);
require 'regen/regen_lib.pl';
# This program outputs charclass_invlists.h, which contains various inversion
@@ -73,6 +73,60 @@ sub output_invlist ($$) {
print $out_fh "};\n";
}
+sub mk_invlist_from_cp_list {
+
+ # Returns an inversion list constructed from the sorted input array of
+ # code points
+
+ my $list_ref = shift;
+
+ # Initialize to just the first element
+ my @invlist = ( $list_ref->[0], $list_ref->[0] + 1);
+
+ # For each succeeding element, if it extends the previous range, adjust
+ # up, otherwise add it.
+ for my $i (1 .. @$list_ref - 1) {
+ if ($invlist[-1] == $list_ref->[$i]) {
+ $invlist[-1]++;
+ }
+ else {
+ push @invlist, $list_ref->[$i], $list_ref->[$i] + 1;
+ }
+ }
+ return @invlist;
+}
+
+# Read in the Case Folding rules, and construct arrays of code points for the
+# properties we need.
+my ($cp_ref, $folds_ref, $format) = prop_invmap("Case_Folding");
+die "Could not find inversion map for Case_Folding" unless defined $format;
+die "Incorrect format '$format' for Case_Folding inversion map"
+ unless $format eq 'al';
+my @has_multi_char_fold;
+my @is_non_final_fold;
+
+for my $i (0 .. @$folds_ref - 1) {
+ next unless ref $folds_ref->[$i]; # Skip single-char folds
+ push @has_multi_char_fold, $cp_ref->[$i];
+
+ # Add to the the non-finals list each code point that is in a non-final
+ # position
+ for my $j (0 .. @{$folds_ref->[$i]} - 2) {
+ push @is_non_final_fold, $folds_ref->[$i][$j]
+ unless grep { $folds_ref->[$i][$j] == $_ } @is_non_final_fold;
+ }
+}
+
+sub _Perl_Multi_Char_Folds {
+ @has_multi_char_fold = sort { $a <=> $b } @has_multi_char_fold;
+ return mk_invlist_from_cp_list(\@has_multi_char_fold);
+}
+
+sub _Perl_Non_Final_Folds {
+ @is_non_final_fold = sort { $a <=> $b } @is_non_final_fold;
+ return mk_invlist_from_cp_list(\@is_non_final_fold);
+}
+
output_invlist("Latin1", [ 0, 256 ]);
output_invlist("AboveLatin1", [ 256 ]);
@@ -95,6 +149,9 @@ output_invlist("AboveLatin1", [ 256 ]);
# In the list of properties below that get generated, the L1 prefix is a fake
# property that means just the Latin1 range of the full property (whose name
# has an X prefix instead of L1).
+#
+# An initial & means to use the subroutine from this file instead of an
+# official inversion list.
for my $prop (qw(
ASCII
@@ -127,8 +184,8 @@ for my $prop (qw(
L1PosixWord
PosixXDigit
XPosixXDigit
- NonL1_Perl_Non_Final_Folds
- _Perl_Multi_Char_Folds
+ &NonL1_Perl_Non_Final_Folds
+ &_Perl_Multi_Char_Folds
)
) {
@@ -145,11 +202,20 @@ for my $prop (qw(
# start a new range above 255, as that could be construed as going to
# infinity. For example, the Upper property doesn't include the character
# at 255, but does include the one at 256. We don't include the 256 one.
- my $lookup_prop = $prop;
+ my $prop_name = $prop;
+ my $is_local_sub = $prop_name =~ s/^&//;
+ my $lookup_prop = $prop_name;
my $l1_only = ($lookup_prop =~ s/^L1Posix/XPosix/ or $lookup_prop =~ s/^L1//);
my $nonl1_only = 0;
$nonl1_only = $lookup_prop =~ s/^NonL1// unless $l1_only;
- my @invlist = prop_invlist($lookup_prop, '_perl_core_internal_ok');
+
+ my @invlist;
+ if ($is_local_sub) {
+ @invlist = eval $lookup_prop;
+ }
+ else {
+ @invlist = prop_invlist($lookup_prop, '_perl_core_internal_ok');
+ }
die "Could not find inversion list for '$lookup_prop'" unless @invlist;
if ($l1_only) {
@@ -194,7 +260,7 @@ for my $prop (qw(
die "No non-Latin1 code points in $lookup_prop" unless $found_nonl1;
}
- output_invlist($prop, \@invlist);
+ output_invlist($prop_name, \@invlist);
}
read_only_bottom_close_and_rename($out_fh)