summaryrefslogtreecommitdiff
path: root/regen
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-02-11 12:07:00 -0700
committerKarl Williamson <public@khwilliamson.com>2013-07-03 19:21:16 -0600
commit533c4e2f08b42d977e5004e823d4849f7473d2d0 (patch)
treeb5b9d1f80b1b1069c67bc7467ea61c34ca396df1 /regen
parentccbd2626ed2f48a52d8b29a6c007fb3736570c9e (diff)
downloadperl-533c4e2f08b42d977e5004e823d4849f7473d2d0.tar.gz
regcomp.c: Add a constant 0 element before inversion lists
This commit is the first step to separating the header from the body of inversion lists. Doing so will allow the compiled-in inversion lists to be fully read-only. To invert an inversion list, one simply unshifts a 0 to the front of it if one is not there, and shifts off the 0 if it does have one. The current data structure reserves an element at the beginning of each inversion list that is either 0 or 1. If 0, it means the inversion list begins there; if 1, it means the inversion list starts at the next element. Inverting involves flipping this bit. This commit changes the structure so that there is an additional element just after the element that flips. This new element is always 0, and the flipping element now says whether the inversion list begins at the constant 0 element, or the one after that. Doing this allows the flipping element to be separated in later commits from the body of the inversion list, which will always begin with the constant 0 element. That means that the body of the inversion list can be const.
Diffstat (limited to 'regen')
-rw-r--r--regen/mk_invlists.pl27
1 files changed, 9 insertions, 18 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 67b6e417fc..27c080233c 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -15,7 +15,7 @@ require 'regen/regen_lib.pl';
# in the headers is used to minimize the possibility of things getting
# out-of-sync, or the wrong data structure being passed. Currently that
# random number is:
-my $VERSION_DATA_STRUCTURE_TYPE = 290655244;
+my $VERSION_DATA_STRUCTURE_TYPE = 1039476070;
my $out_fh = open_new('charclass_invlists.h', '>',
{style => '*', by => $0,
@@ -36,27 +36,18 @@ sub output_invlist ($$) {
# Output the inversion list $invlist using the name $name for it.
# It is output in the exact internal form for inversion lists.
- my $zero_or_one; # Is the last element of the header 0, or 1 ?
-
- # If the first element is 0, it goes in the header, instead of the body
- if ($invlist->[0] == 0) {
- shift @$invlist;
-
- $zero_or_one = 0;
-
- # Add a dummy 0 at the end so that the length is constant. inversion
- # lists are always stored with enough room so that if they change from
- # beginning with 0, they don't have to grow.
- push @$invlist, 0;
- }
- else {
+ # Is the last element of the header 0, or 1 ?
+ my $zero_or_one = 0;
+ my $count = @$invlist;
+ if ($invlist->[0] != 0) {
+ unshift @$invlist, 0;
$zero_or_one = 1;
}
print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name};
print $out_fh "\nstatic UV ${name}_invlist[] = {\n";
- print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
+ print $out_fh "\t$count,\t/* Number of elements */\n";
# This should be UV_MAX, but I (khw) am not confident that the suffixes
# for specifying the constant are portable, e.g. 'ull' on a 32 bit
@@ -65,8 +56,8 @@ sub output_invlist ($$) {
print $out_fh "\t0,\t/* Cache of previous search index result */\n";
print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n";
print $out_fh "\t", $zero_or_one,
- ",\t/* 0 if this is the first element of the list proper;",
- "\n\t\t 1 if the next element is the first */\n";
+ ",\t/* 0 if the list starts at 0;",
+ "\n\t\t 1 if it starts at the element beyond 0 */\n";
# The main body are the UVs passed in to this routine. Do the final
# element separately