summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-07-16 13:59:03 -0600
committerKarl Williamson <public@khwilliamson.com>2013-07-16 13:59:03 -0600
commit49fb45ddc8d9f3f37c5080633e16ae291297ddc2 (patch)
tree6fe559069a877628fbbab7873bcfd09fa539b6e0
parentd212d2222867b5b63a5f6d8c3243d0d45a26988c (diff)
parent4c60406d0e89efe615812945449c96f48bf70433 (diff)
downloadperl-49fb45ddc8d9f3f37c5080633e16ae291297ddc2.tar.gz
Merge branch 'const_posix_invlists' into blead
This is the second attempt to put this functionality into blead. The first was in commit dab1d6f279e5792c6a935eeaeeec652a883df979. Its elements had to be backed out because it turns out there were array bounds errors. This new commit should have fixed those. The revised commit message is: What characters certain POSIX classes match, like [[:xdigit:]] are compiled into a C header file, thus avoiding the necessity of reading them in from disk at run-time. This merge makes those fully const, so that they can get loaded as part of a read-only text segment. The sv's that contain these are set so that SvLEN is 0; this means that copies are not made when they are dup'd, such as when threads are created. A new svtype is created for inversion lists, using the single available slot, renumbering them. The first few commits instead use an existing svtype, repurposing some of its fields for use by inversion lists. This was done so that this could be done in a maintenance release, if necessary. (Their not being fully const can interfere with -DPERL_GLOBAL_STRUCT_PRIVATE.) And also, should it become necessary to create an svtype for some other purpose, we can revert to that point in the branch
-rw-r--r--charclass_invlists.h446
-rw-r--r--dist/Storable/Storable.xs2
-rw-r--r--dump.c7
-rw-r--r--embed.fnc20
-rw-r--r--embed.h6
-rw-r--r--ext/B/B.xs2
-rw-r--r--inline_invlist.c54
-rw-r--r--perl.h15
-rw-r--r--pod/perlguts.pod5
-rw-r--r--proto.h34
-rw-r--r--regcomp.c328
-rw-r--r--regen/mk_invlists.pl35
-rw-r--r--sv.c21
-rw-r--r--sv.h27
14 files changed, 444 insertions, 558 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index b5d71afb16..f3de65ab7a 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -9,28 +9,25 @@
#ifndef PERL_IN_XSUB_RE
-static UV Latin1_invlist[] = {
+static const UV Latin1_invlist[] = {
2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
- 256,
- 0
+ 148565664, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
+ 256
};
#endif
#ifndef PERL_IN_XSUB_RE
-static UV AboveLatin1_invlist[] = {
- 1, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV AboveLatin1_invlist[] = {
+ 2, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
256
};
@@ -38,28 +35,25 @@ static UV AboveLatin1_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV ASCII_invlist[] = {
+static const UV ASCII_invlist[] = {
2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
- 128,
- 0
+ 148565664, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
+ 128
};
#endif
#ifndef PERL_IN_XSUB_RE
-static UV L1Cased_invlist[] = {
- 16, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1Cased_invlist[] = {
+ 17, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -82,13 +76,12 @@ static UV L1Cased_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV VertSpace_invlist[] = {
- 6, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV VertSpace_invlist[] = {
+ 7, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
10,
14,
133,
@@ -101,13 +94,12 @@ static UV VertSpace_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PerlSpace_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PerlSpace_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -118,13 +110,12 @@ static UV PerlSpace_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV XPerlSpace_invlist[] = {
- 22, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV XPerlSpace_invlist[] = {
+ 23, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -153,13 +144,12 @@ static UV XPerlSpace_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixAlnum_invlist[] = {
- 6, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixAlnum_invlist[] = {
+ 7, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -172,13 +162,12 @@ static UV PosixAlnum_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixAlnum_invlist[] = {
- 18, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixAlnum_invlist[] = {
+ 19, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -203,13 +192,12 @@ static UV L1PosixAlnum_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixAlpha_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixAlpha_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -220,13 +208,12 @@ static UV PosixAlpha_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixAlpha_invlist[] = {
- 16, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixAlpha_invlist[] = {
+ 17, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -249,13 +236,12 @@ static UV L1PosixAlpha_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixBlank_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixBlank_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
10,
32,
@@ -266,13 +252,12 @@ static UV PosixBlank_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV XPosixBlank_invlist[] = {
- 18, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV XPosixBlank_invlist[] = {
+ 19, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
10,
32,
@@ -297,47 +282,42 @@ static UV XPosixBlank_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixCntrl_invlist[] = {
+static const UV PosixCntrl_invlist[] = {
4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 148565664, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
- 128,
- 0
+ 128
};
#endif
#ifndef PERL_IN_XSUB_RE
-static UV XPosixCntrl_invlist[] = {
+static const UV XPosixCntrl_invlist[] = {
4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 148565664, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
- 160,
- 0
+ 160
};
#endif
#ifndef PERL_IN_XSUB_RE
-static UV PosixDigit_invlist[] = {
- 2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixDigit_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58
};
@@ -346,13 +326,12 @@ static UV PosixDigit_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixGraph_invlist[] = {
- 2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixGraph_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
127
};
@@ -361,13 +340,12 @@ static UV PosixGraph_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixGraph_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixGraph_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
127,
161,
@@ -378,13 +356,12 @@ static UV L1PosixGraph_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixLower_invlist[] = {
- 2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixLower_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
97,
123
};
@@ -393,13 +370,12 @@ static UV PosixLower_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixLower_invlist[] = {
- 12, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixLower_invlist[] = {
+ 13, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
97,
123,
170,
@@ -418,13 +394,12 @@ static UV L1PosixLower_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixPrint_invlist[] = {
- 2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixPrint_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127
};
@@ -433,13 +408,12 @@ static UV PosixPrint_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixPrint_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixPrint_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
160,
@@ -450,13 +424,12 @@ static UV L1PosixPrint_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixPunct_invlist[] = {
- 8, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixPunct_invlist[] = {
+ 9, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
48,
58,
@@ -471,13 +444,12 @@ static UV PosixPunct_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixPunct_invlist[] = {
- 20, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixPunct_invlist[] = {
+ 21, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
48,
58,
@@ -504,13 +476,12 @@ static UV L1PosixPunct_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixSpace_invlist[] = {
- 4, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixSpace_invlist[] = {
+ 5, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -521,13 +492,12 @@ static UV PosixSpace_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV XPosixSpace_invlist[] = {
- 22, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV XPosixSpace_invlist[] = {
+ 23, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -556,13 +526,12 @@ static UV XPosixSpace_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixUpper_invlist[] = {
- 2, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixUpper_invlist[] = {
+ 3, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91
};
@@ -571,13 +540,12 @@ static UV PosixUpper_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixUpper_invlist[] = {
- 6, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixUpper_invlist[] = {
+ 7, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
192,
@@ -590,13 +558,12 @@ static UV L1PosixUpper_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixWord_invlist[] = {
- 8, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixWord_invlist[] = {
+ 9, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -611,13 +578,12 @@ static UV PosixWord_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV L1PosixWord_invlist[] = {
- 20, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV L1PosixWord_invlist[] = {
+ 21, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -644,13 +610,12 @@ static UV L1PosixWord_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV PosixXDigit_invlist[] = {
- 6, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV PosixXDigit_invlist[] = {
+ 7, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -663,13 +628,12 @@ static UV PosixXDigit_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV XPosixXDigit_invlist[] = {
- 12, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV XPosixXDigit_invlist[] = {
+ 13, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -686,13 +650,12 @@ static UV XPosixXDigit_invlist[] = {
#endif
-static UV NonL1_Perl_Non_Final_Folds_invlist[] = {
- 44, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV NonL1_Perl_Non_Final_Folds_invlist[] = {
+ 45, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
700,
701,
776,
@@ -741,13 +704,12 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = {
#ifndef PERL_IN_XSUB_RE
-static UV _Perl_Multi_Char_Folds_invlist[] = {
- 58, /* Number of elements */
- 0, /* Current iteration position */
- 0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+static const UV _Perl_Multi_Char_Folds_invlist[] = {
+ 59, /* Number of elements */
+ 148565664, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
223,
224,
304,
diff --git a/dist/Storable/Storable.xs b/dist/Storable/Storable.xs
index 9cba279c45..f0cfceabeb 100644
--- a/dist/Storable/Storable.xs
+++ b/dist/Storable/Storable.xs
@@ -3513,7 +3513,7 @@ static int sv_type(pTHX_ SV *sv)
case SVt_PVCV:
return svis_CODE;
#if PERL_VERSION > 8
- /* case SVt_DUMMY: */
+ /* case SVt_INVLIST: */
#endif
default:
break;
diff --git a/dump.c b/dump.c
index 5ca838b043..6ba4fd2425 100644
--- a/dump.c
+++ b/dump.c
@@ -27,13 +27,12 @@
#include "perl.h"
#include "regcomp.h"
-
static const char* const svtypenames[SVt_LAST] = {
"NULL",
- "DUMMY",
"IV",
"NV",
"PV",
+ "INVLIST",
"PVIV",
"PVNV",
"PVMG",
@@ -50,10 +49,10 @@ static const char* const svtypenames[SVt_LAST] = {
static const char* const svshorttypenames[SVt_LAST] = {
"UNDEF",
- "DUMMY",
"IV",
"NV",
"PV",
+ "INVLST",
"PVIV",
"PVNV",
"PVMG",
@@ -2798,7 +2797,7 @@ Perl_sv_xmlpeek(pTHX_ SV *sv)
case SVt_PVGV:
sv_catpv(t, " GV=\"");
break;
- case SVt_DUMMY:
+ case SVt_INVLIST:
sv_catpv(t, " DUMMY=\"");
break;
case SVt_REGEXP:
diff --git a/embed.fnc b/embed.fnc
index bd78b71f74..a9e4215380 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1088,8 +1088,8 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \
|NN const struct regnode *node|bool doinit \
|NULLOK SV **listsvp|NULLOK SV **altsvp
#ifdef PERL_IN_REGCOMP_C
-EMsR |SV* |_new_invlist_C_array|NN UV* list
-: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|bool complement_b
+EMsR |SV* |_new_invlist_C_array|NN const UV* const list
+: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b
#endif
Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
@@ -1434,17 +1434,15 @@ EsM |void |_append_range_to_invlist |NN SV* const invlist|const UV start|const
EiMR |UV* |_invlist_array_init |NN SV* const invlist|const bool will_have_0
EiMR |UV* |invlist_array |NN SV* const invlist
EsM |void |invlist_extend |NN SV* const invlist|const UV len
-EiMR |UV* |get_invlist_zero_addr |NN SV* invlist
EiMR |UV |invlist_max |NN SV* const invlist
-EiM |void |invlist_set_len|NN SV* const invlist|const UV len
+EiM |void |invlist_set_len|NN SV* const invlist|const UV len|const bool offset
EiMR |IV* |get_invlist_previous_index_addr|NN SV* invlist
EiMR |IV |invlist_previous_index|NN SV* const invlist
EiM |void |invlist_set_previous_index|NN SV* const invlist|const IV index
EiM |void |invlist_trim |NN SV* const invlist
EiMR |SV* |invlist_clone |NN SV* const invlist
EiMR |bool |invlist_is_iterating|NN SV* const invlist
-EiMR |UV* |get_invlist_iter_addr |NN SV* invlist
-EiMR |UV* |get_invlist_version_id_addr |NN SV* invlist
+EiMR |STRLEN*|get_invlist_iter_addr |NN SV* invlist
EiM |void |invlist_iterinit|NN SV* invlist
EsMR |bool |invlist_iternext|NN SV* invlist|NN UV* start|NN UV* end
EiM |void |invlist_iterfinish|NN SV* invlist
@@ -1452,9 +1450,13 @@ EiMR |UV |invlist_highest|NN SV* const invlist
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
-EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
+EXpM |void |_invlist_intersection_maybe_complement_2nd \
+ |NULLOK SV* const a|NN SV* const b \
+ |const bool complement_b|NN SV** i
EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
-EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
+EXpM |void |_invlist_union_maybe_complement_2nd \
+ |NULLOK SV* const a|NN SV* const b \
+ |const bool complement_b|NN SV** output
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXpM |void |_invlist_invert_prop|NN SV* const invlist
@@ -1470,7 +1472,7 @@ EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
EXMpR |SV* |_invlist_contents|NN SV* const invlist
-EiMR |UV* |_get_invlist_len_addr |NN SV* invlist
+EiMR |bool* |get_invlist_offset_addr|NN SV* invlist
EiMR |UV |_invlist_len |NN SV* const invlist
EMiR |bool |_invlist_contains_cp|NN SV* const invlist|const UV cp
EXpMR |IV |_invlist_search |NN SV* const invlist|const UV cp
diff --git a/embed.h b/embed.h
index 58b7b357b6..8e9b059a19 100644
--- a/embed.h
+++ b/embed.h
@@ -908,8 +908,6 @@
#define could_it_be_a_POSIX_class(a) S_could_it_be_a_POSIX_class(aTHX_ a)
#define get_invlist_iter_addr(a) S_get_invlist_iter_addr(aTHX_ a)
#define get_invlist_previous_index_addr(a) S_get_invlist_previous_index_addr(aTHX_ a)
-#define get_invlist_version_id_addr(a) S_get_invlist_version_id_addr(aTHX_ a)
-#define get_invlist_zero_addr(a) S_get_invlist_zero_addr(aTHX_ a)
#define grok_bslash_N(a,b,c,d,e,f,g) S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
#define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e)
#define invlist_array(a) S_invlist_array(aTHX_ a)
@@ -922,7 +920,7 @@
#define invlist_iternext(a,b,c) S_invlist_iternext(aTHX_ a,b,c)
#define invlist_max(a) S_invlist_max(aTHX_ a)
#define invlist_previous_index(a) S_invlist_previous_index(aTHX_ a)
-#define invlist_set_len(a,b) S_invlist_set_len(aTHX_ a,b)
+#define invlist_set_len(a,b,c) S_invlist_set_len(aTHX_ a,b,c)
#define invlist_set_previous_index(a,b) S_invlist_set_previous_index(aTHX_ a,b)
#define invlist_trim(a) S_invlist_trim(aTHX_ a)
#define join_exact(a,b,c,d,e,f,g) S_join_exact(aTHX_ a,b,c,d,e,f,g)
@@ -950,13 +948,13 @@
#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
-#define _get_invlist_len_addr(a) S__get_invlist_len_addr(aTHX_ a)
#define _get_swash_invlist(a) Perl__get_swash_invlist(aTHX_ a)
#define _invlist_contains_cp(a,b) S__invlist_contains_cp(aTHX_ a,b)
#define _invlist_contents(a) Perl__invlist_contents(aTHX_ a)
#define _invlist_len(a) S__invlist_len(aTHX_ a)
#define _invlist_search(a,b) Perl__invlist_search(aTHX_ a,b)
#define _swash_inversion_hash(a) Perl__swash_inversion_hash(aTHX_ a)
+#define get_invlist_offset_addr(a) S_get_invlist_offset_addr(aTHX_ a)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
#define _core_swash_init(a,b,c,d,e,f,g) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g)
diff --git a/ext/B/B.xs b/ext/B/B.xs
index fbe6be6719..e7049f04c2 100644
--- a/ext/B/B.xs
+++ b/ext/B/B.xs
@@ -21,13 +21,13 @@ typedef FILE * InputStream;
static const char* const svclassnames[] = {
"B::NULL",
- "B::BIND",
"B::IV",
"B::NV",
#if PERL_VERSION <= 10
"B::RV",
#endif
"B::PV",
+ "B::INVLIST",
"B::PVIV",
"B::PVNV",
"B::PVMG",
diff --git a/inline_invlist.c b/inline_invlist.c
index b56ce60002..470659bea8 100644
--- a/inline_invlist.c
+++ b/inline_invlist.c
@@ -8,47 +8,24 @@
#if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
-#define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */
-#define INVLIST_ITER_OFFSET 1 /* Current iteration position */
-#define INVLIST_PREVIOUS_INDEX_OFFSET 2 /* Place to cache index of previous
- result */
-
-/* This is a combination of a version and data structure type, so that one
- * being passed in can be validated to be an inversion list of the correct
- * vintage. When the structure of the header is changed, a new random number
- * in the range 2**31-1 should be generated and the new() method changed to
- * insert that at this location. Then, if an auxiliary program doesn't change
- * correspondingly, it will be discovered immediately */
-#define INVLIST_VERSION_ID_OFFSET 3
-#define INVLIST_VERSION_ID 290655244
-
-/* For safety, when adding new elements, remember to #undef them at the end of
- * the inversion list code section */
-
-#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
- * contains the code point U+00000, and begins here. If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it. By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
-
/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
* etc */
#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-PERL_STATIC_INLINE UV*
-S__get_invlist_len_addr(pTHX_ SV* invlist)
+/* This converts to/from our UVs to what the SV code is expecting: bytes. */
+#define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV))
+#define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV))
+
+PERL_STATIC_INLINE bool*
+S_get_invlist_offset_addr(pTHX_ SV* invlist)
{
- /* Return the address of the UV that contains the current number
- * of used elements in the inversion list */
+ /* Return the address of the field that says whether the inversion list is
+ * offset (it contains 1) or not (contains 0) */
- PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR;
+ PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR;
- return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
+ return &(((XINVLIST*) SvANY(invlist))->is_offset);
}
PERL_STATIC_INLINE UV
@@ -59,7 +36,9 @@ S__invlist_len(pTHX_ SV* const invlist)
PERL_ARGS_ASSERT__INVLIST_LEN;
- return *_get_invlist_len_addr(invlist);
+ return (SvCUR(invlist) == 0)
+ ? 0
+ : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist);
}
PERL_STATIC_INLINE bool
@@ -74,4 +53,11 @@ S__invlist_contains_cp(pTHX_ SV* const invlist, const UV cp)
return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index);
}
+# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGEXEC_C)
+
+/* These symbols are only needed later in regcomp.c */
+# undef TO_INTERNAL_SIZE
+# undef FROM_INTERNAL_SIZE
+# endif
+
#endif
diff --git a/perl.h b/perl.h
index bc8388ff53..cfcf87177c 100644
--- a/perl.h
+++ b/perl.h
@@ -2286,6 +2286,7 @@ typedef struct xpvuv XPVUV;
typedef struct xpvnv XPVNV;
typedef struct xpvmg XPVMG;
typedef struct xpvlv XPVLV;
+typedef struct xpvinvlist XINVLIST;
typedef struct xpvav XPVAV;
typedef struct xpvhv XPVHV;
typedef struct xpvgv XPVGV;
@@ -5051,19 +5052,19 @@ EXTCONST U8 PL_magic_data[256];
#endif
#ifdef DOINIT
- /* NL BD IV NV PV PI PN MG RX GV LV AV HV CV FM IO */
+ /* NL IV NV PV INV PI PN MG RX GV LV AV HV CV FM IO */
EXTCONST bool
-PL_valid_types_IVX[] = { 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };
+PL_valid_types_IVX[] = { 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };
EXTCONST bool
-PL_valid_types_NVX[] = { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };
+PL_valid_types_NVX[] = { 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0 };
EXTCONST bool
-PL_valid_types_PVX[] = { 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1 };
+PL_valid_types_PVX[] = { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1 };
EXTCONST bool
-PL_valid_types_RV[] = { 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1 };
+PL_valid_types_RV[] = { 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1 };
EXTCONST bool
-PL_valid_types_IV_set[] = { 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1 };
+PL_valid_types_IV_set[] = { 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1 };
EXTCONST bool
-PL_valid_types_NV_set[] = { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+PL_valid_types_NV_set[] = { 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
#else
diff --git a/pod/perlguts.pod b/pod/perlguts.pod
index fa05c23560..553d9143f4 100644
--- a/pod/perlguts.pod
+++ b/pod/perlguts.pod
@@ -39,9 +39,8 @@ values that can be loaded: an integer value (IV), an unsigned integer
value (UV), a double (NV), a string (PV), and another scalar (SV).
("PV" stands for "Pointer Value". You might think that it is misnamed
because it is described as pointing only to strings. However, it is
-possible to have it point to other things. For example, inversion
-lists, used in regular expression data structures, are scalars, each
-consisting of an array of UVs which are accessed through PVs. But,
+possible to have it point to other things For example, it could point
+to an array of UVs. But,
using it for non-strings requires care, as the underlying assumption of
much of the internals is that PVs are just for strings. Often, for
example, a trailing NUL is tacked on automatically. The non-string use
diff --git a/proto.h b/proto.h
index 5c21bf7710..9a6f5ddde2 100644
--- a/proto.h
+++ b/proto.h
@@ -6439,7 +6439,7 @@ PERL_STATIC_INLINE UV* S__invlist_array_init(pTHX_ SV* const invlist, const bool
#define PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT \
assert(invlist)
-STATIC SV* S__new_invlist_C_array(pTHX_ UV* list)
+STATIC SV* S__new_invlist_C_array(pTHX_ const UV* const list)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY \
@@ -6503,7 +6503,7 @@ STATIC bool S_could_it_be_a_POSIX_class(pTHX_ struct RExC_state_t *pRExC_state)
#define PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS \
assert(pRExC_state)
-PERL_STATIC_INLINE UV* S_get_invlist_iter_addr(pTHX_ SV* invlist)
+PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(pTHX_ SV* invlist)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR \
@@ -6515,18 +6515,6 @@ PERL_STATIC_INLINE IV* S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
#define PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR \
assert(invlist)
-PERL_STATIC_INLINE UV* S_get_invlist_version_id_addr(pTHX_ SV* invlist)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_GET_INVLIST_VERSION_ID_ADDR \
- assert(invlist)
-
-PERL_STATIC_INLINE UV* S_get_invlist_zero_addr(pTHX_ SV* invlist)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR \
- assert(invlist)
-
STATIC bool S_grok_bslash_N(pTHX_ struct RExC_state_t *pRExC_state, regnode** nodep, UV *valuep, I32 *flagp, U32 depth, bool in_char_class, const bool strict)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_4);
@@ -6599,7 +6587,7 @@ PERL_STATIC_INLINE IV S_invlist_previous_index(pTHX_ SV* const invlist)
#define PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX \
assert(invlist)
-PERL_STATIC_INLINE void S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
+PERL_STATIC_INLINE void S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_INVLIST_SET_LEN \
assert(invlist)
@@ -6766,12 +6754,6 @@ STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
-PERL_STATIC_INLINE UV* S__get_invlist_len_addr(pTHX_ SV* invlist)
- __attribute__warn_unused_result__
- __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR \
- assert(invlist)
-
PERL_CALLCONV SV* Perl__get_swash_invlist(pTHX_ SV* const swash)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
@@ -6808,6 +6790,12 @@ PERL_CALLCONV HV* Perl__swash_inversion_hash(pTHX_ SV* const swash)
#define PERL_ARGS_ASSERT__SWASH_INVERSION_HASH \
assert(swash)
+PERL_STATIC_INLINE bool* S_get_invlist_offset_addr(pTHX_ SV* invlist)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR \
+ assert(invlist)
+
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C) || defined(PERL_IN_TOKE_C)
PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV* listsv, I32 minbits, I32 none, SV* invlist, U8* const flags_p)
@@ -6862,7 +6850,7 @@ PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start,
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
+PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND \
@@ -6893,7 +6881,7 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
+PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
diff --git a/regcomp.c b/regcomp.c
index e150654ba1..b40425f2fa 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7026,11 +7026,12 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
/* This section of code defines the inversion list object and its methods. The
* interfaces are highly subject to change, so as much as possible is static to
* this file. An inversion list is here implemented as a malloc'd C UV array
- * with some added info that is placed as UVs at the beginning in a header
- * portion. An inversion list for Unicode is an array of code points, sorted
- * by ordinal number. The zeroth element is the first code point in the list.
- * The 1th element is the first element beyond that not in the list. In other
- * words, the first range is
+ * as an SVt_INVLIST scalar.
+ *
+ * An inversion list for Unicode is an array of code points, sorted by ordinal
+ * number. The zeroth element is the first code point in the list. The 1th
+ * element is the first element beyond that not in the list. In other words,
+ * the first range is
* invlist[0]..(invlist[1]-1)
* The other ranges follow. Thus every element whose index is divisible by two
* marks the beginning of a range that is in the list, and every element not
@@ -7048,9 +7049,9 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
* Taking the complement (inverting) an inversion list is quite simple, if the
* first element is 0, remove it; otherwise add a 0 element at the beginning.
* This implementation reserves an element at the beginning of each inversion
- * list to contain 0 when the list contains 0, and contains 1 otherwise. The
- * actual beginning of the list is either that element if 0, or the next one if
- * 1.
+ * list to always contain 0; there is an additional flag in the header which
+ * indicates if the list begins at the 0, or is offset to begin at the next
+ * element.
*
* More about inversion lists can be found in "Unicode Demystified"
* Chapter 13 by Richard Gillam, published by Addison-Wesley.
@@ -7065,32 +7066,31 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
* should eventually be made public */
/* The header definitions are in F<inline_invlist.c> */
-#define TO_INTERNAL_SIZE(x) (((x) + HEADER_LENGTH) * sizeof(UV))
-#define FROM_INTERNAL_SIZE(x) (((x)/ sizeof(UV)) - HEADER_LENGTH)
-
-#define INVLIST_INITIAL_LEN 10
PERL_STATIC_INLINE UV*
S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
{
/* Returns a pointer to the first element in the inversion list's array.
* This is called upon initialization of an inversion list. Where the
- * array begins depends on whether the list has the code point U+0000
- * in it or not. The other parameter tells it whether the code that
- * follows this call is about to put a 0 in the inversion list or not.
- * The first element is either the element with 0, if 0, or the next one,
- * if 1 */
+ * array begins depends on whether the list has the code point U+0000 in it
+ * or not. The other parameter tells it whether the code that follows this
+ * call is about to put a 0 in the inversion list or not. The first
+ * element is either the element reserved for 0, if TRUE, or the element
+ * after it, if FALSE */
- UV* zero = get_invlist_zero_addr(invlist);
+ bool* offset = get_invlist_offset_addr(invlist);
+ UV* zero_addr = (UV *) SvPVX(invlist);
PERL_ARGS_ASSERT__INVLIST_ARRAY_INIT;
/* Must be empty */
- assert(! *_get_invlist_len_addr(invlist));
+ assert(! _invlist_len(invlist));
+
+ *zero_addr = 0;
/* 1^1 = 0; 1^0 = 1 */
- *zero = 1 ^ will_have_0;
- return zero + *zero;
+ *offset = 1 ^ will_have_0;
+ return zero_addr + *offset;
}
PERL_STATIC_INLINE UV*
@@ -7104,53 +7104,40 @@ S_invlist_array(pTHX_ SV* const invlist)
/* Must not be empty. If these fail, you probably didn't check for <len>
* being non-zero before trying to get the array */
- assert(*_get_invlist_len_addr(invlist));
- assert(*get_invlist_zero_addr(invlist) == 0
- || *get_invlist_zero_addr(invlist) == 1);
-
- /* The array begins either at the element reserved for zero if the
- * list contains 0 (that element will be set to 0), or otherwise the next
- * element (in which case the reserved element will be set to 1). */
- return (UV *) (get_invlist_zero_addr(invlist)
- + *get_invlist_zero_addr(invlist));
+ assert(_invlist_len(invlist));
+
+ /* The very first element always contains zero, The array begins either
+ * there, or if the inversion list is offset, at the element after it.
+ * The offset header field determines which; it contains 0 or 1 to indicate
+ * how much additionally to add */
+ assert(0 == *(SvPVX(invlist)));
+ return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
}
PERL_STATIC_INLINE void
-S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
+S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
{
- /* Sets the current number of elements stored in the inversion list */
+ /* Sets the current number of elements stored in the inversion list.
+ * Updates SvCUR correspondingly */
PERL_ARGS_ASSERT_INVLIST_SET_LEN;
- *_get_invlist_len_addr(invlist) = len;
-
- assert(len <= SvLEN(invlist));
-
- SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
- /* If the list contains U+0000, that element is part of the header,
- * and should not be counted as part of the array. It will contain
- * 0 in that case, and 1 otherwise. So we could flop 0=>1, 1=>0 and
- * subtract:
- * SvCUR_set(invlist,
- * TO_INTERNAL_SIZE(len
- * - (*get_invlist_zero_addr(inv_list) ^ 1)));
- * But, this is only valid if len is not 0. The consequences of not doing
- * this is that the memory allocation code may think that 1 more UV is
- * being used than actually is, and so might do an unnecessary grow. That
- * seems worth not bothering to make this the precise amount.
- *
- * Note that when inverting, SvCUR shouldn't change */
+ SvCUR_set(invlist,
+ (len == 0)
+ ? 0
+ : TO_INTERNAL_SIZE(len + offset));
+ assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
}
PERL_STATIC_INLINE IV*
S_get_invlist_previous_index_addr(pTHX_ SV* invlist)
{
- /* Return the address of the UV that is reserved to hold the cached index
+ /* Return the address of the IV that is reserved to hold the cached index
* */
PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR;
- return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV)));
+ return &(((XINVLIST*) SvANY(invlist))->prev_index);
}
PERL_STATIC_INLINE IV
@@ -7183,22 +7170,11 @@ S_invlist_max(pTHX_ SV* const invlist)
PERL_ARGS_ASSERT_INVLIST_MAX;
+ /* Assumes worst case, in which the 0 element is not counted in the
+ * inversion list, so subtracts 1 for that */
return SvLEN(invlist) == 0 /* This happens under _new_invlist_C_array */
- ? _invlist_len(invlist)
- : FROM_INTERNAL_SIZE(SvLEN(invlist));
-}
-
-PERL_STATIC_INLINE UV*
-S_get_invlist_zero_addr(pTHX_ SV* invlist)
-{
- /* Return the address of the UV that is reserved to hold 0 if the inversion
- * list contains 0. This has to be the last element of the heading, as the
- * list proper starts with either it if 0, or the next element if not.
- * (But we force it to contain either 0 or 1) */
-
- PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR;
-
- return (UV *) (SvPVX(invlist) + (INVLIST_ZERO_OFFSET * sizeof (UV)));
+ ? FROM_INTERNAL_SIZE(SvCUR(invlist)) - 1
+ : FROM_INTERNAL_SIZE(SvLEN(invlist)) - 1;
}
#ifndef PERL_IN_XSUB_RE
@@ -7213,56 +7189,75 @@ Perl__new_invlist(pTHX_ IV initial_size)
SV* new_list;
if (initial_size < 0) {
- initial_size = INVLIST_INITIAL_LEN;
+ initial_size = 10;
}
/* Allocate the initial space */
- new_list = newSV(TO_INTERNAL_SIZE(initial_size));
- invlist_set_len(new_list, 0);
+ new_list = newSV_type(SVt_INVLIST);
- /* Force iterinit() to be used to get iteration to work */
- *get_invlist_iter_addr(new_list) = UV_MAX;
+ /* First 1 is in case the zero element isn't in the list; second 1 is for
+ * trailing NUL */
+ SvGROW(new_list, TO_INTERNAL_SIZE(initial_size + 1) + 1);
+ invlist_set_len(new_list, 0, 0);
- /* This should force a segfault if a method doesn't initialize this
- * properly */
- *get_invlist_zero_addr(new_list) = UV_MAX;
+ /* Force iterinit() to be used to get iteration to work */
+ *get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX;
*get_invlist_previous_index_addr(new_list) = 0;
- *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 5
-# error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
-#endif
return new_list;
}
#endif
STATIC SV*
-S__new_invlist_C_array(pTHX_ UV* list)
+S__new_invlist_C_array(pTHX_ const UV* const list)
{
/* Return a pointer to a newly constructed inversion list, initialized to
* point to <list>, which has to be in the exact correct inversion list
* form, including internal fields. Thus this is a dangerous routine that
- * should not be used in the wrong hands */
+ * should not be used in the wrong hands. The passed in 'list' contains
+ * several header fields at the beginning that are not part of the
+ * inversion list body proper */
+
+ const STRLEN length = (STRLEN) list[0];
+ const UV version_id = list[1];
+ const bool offset = cBOOL(list[2]);
+#define HEADER_LENGTH 3
+ /* If any of the above changes in any way, you must change HEADER_LENGTH
+ * (if appropriate) and regenerate INVLIST_VERSION_ID by running
+ * perl -E 'say int(rand 2**31-1)'
+ */
+#define INVLIST_VERSION_ID 148565664 /* This is a combination of a version and
+ data structure type, so that one being
+ passed in can be validated to be an
+ inversion list of the correct vintage.
+ */
- SV* invlist = newSV_type(SVt_PV);
+ SV* invlist = newSV_type(SVt_INVLIST);
PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
- SvPV_set(invlist, (char *) list);
+ if (version_id != INVLIST_VERSION_ID) {
+ Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
+ }
+
+ /* The generated array passed in includes header elements that aren't part
+ * of the list proper, so start it just after them */
+ SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
+
SvLEN_set(invlist, 0); /* Means we own the contents, and the system
shouldn't touch it */
- SvCUR_set(invlist, TO_INTERNAL_SIZE(_invlist_len(invlist)));
- if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
- Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
- }
+ *(get_invlist_offset_addr(invlist)) = offset;
+
+ /* The 'length' passed to us is the physical number of elements in the
+ * inversion list. But if there is an offset the logical number is one
+ * less than that */
+ invlist_set_len(invlist, length - offset, offset);
- /* Initialize the iteration pointer.
- * XXX This could be done at compile time in charclass_invlists.h, but I
- * (khw) am not confident that the suffixes for specifying the C constant
- * UV_MAX are portable, e.g. 'ull' on a 32 bit machine that is configured
- * to use 64 bits; might need a Configure probe */
+ invlist_set_previous_index(invlist, 0);
+
+ /* Initialize the iteration pointer. */
invlist_iterfinish(invlist);
return invlist;
@@ -7275,7 +7270,9 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
PERL_ARGS_ASSERT_INVLIST_EXTEND;
- SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max));
+ /* Add one to account for the zero element at the beginning which may not
+ * be counted by the calling parameters */
+ SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
}
PERL_STATIC_INLINE void
@@ -7285,7 +7282,6 @@ S_invlist_trim(pTHX_ SV* const invlist)
/* Change the length of the inversion list to how many entries it currently
* has */
-
SvPV_shrink_to_cur((SV *) invlist);
}
@@ -7301,11 +7297,13 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
UV* array;
UV max = invlist_max(invlist);
UV len = _invlist_len(invlist);
+ bool offset;
PERL_ARGS_ASSERT__APPEND_RANGE_TO_INVLIST;
if (len == 0) { /* Empty lists must be initialized */
- array = _invlist_array_init(invlist, start == 0);
+ offset = start != 0;
+ array = _invlist_array_init(invlist, ! offset);
}
else {
/* Here, the existing list is non-empty. The current max entry in the
@@ -7328,6 +7326,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
* value not in the set, it is extending the set, so the new first
* value not in the set is one greater than the newly extended range.
* */
+ offset = *get_invlist_offset_addr(invlist);
if (array[final_element] == start) {
if (end != UV_MAX) {
array[final_element] = end + 1;
@@ -7335,7 +7334,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
else {
/* But if the end is the maximum representable on the machine,
* just let the range that this would extend to have no end */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, offset);
}
return;
}
@@ -7345,16 +7344,18 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
len += 2; /* Includes an element each for the start and end of range */
- /* If overflows the existing space, extend, which may cause the array to be
- * moved */
+ /* If wll overflow the existing space, extend, which may cause the array to
+ * be moved */
if (max < len) {
invlist_extend(invlist, len);
- invlist_set_len(invlist, len); /* Have to set len here to avoid assert
- failure in invlist_array() */
+
+ /* Have to set len here to avoid assert failure in invlist_array() */
+ invlist_set_len(invlist, len, offset);
+
array = invlist_array(invlist);
}
else {
- invlist_set_len(invlist, len);
+ invlist_set_len(invlist, len, offset);
}
/* The next item on the list starts the range, the one after that is
@@ -7366,7 +7367,7 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, const UV start, const UV end
else {
/* But if the end is the maximum representable on the machine, just let
* the range have no end */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, offset);
}
}
@@ -7554,7 +7555,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
}
void
-Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
+Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
{
/* Take the union of two inversion lists and point <output> to it. *output
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7576,8 +7577,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
* return the larger of the input lists, but then outside code might need
* to keep track of whether to free the input list or not */
- UV* array_a; /* a's array */
- UV* array_b;
+ const UV* array_a; /* a's array */
+ const UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7645,23 +7646,17 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later, and clear the
- * flag as we don't have to do anything else later */
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
- array_b[0] = 0;
}
}
@@ -7757,7 +7752,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
/* Set result to final length, which can change the pointer to array_u, so
* re-find it */
if (len_u != _invlist_len(u)) {
- invlist_set_len(u, len_u);
+ invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
invlist_trim(u);
array_u = invlist_array(u);
}
@@ -7778,11 +7773,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
}
}
- /* If we've changed b, restore it */
- if (complement_b) {
- array_b[0] = 1;
- }
-
/* We may be removing a reference to one of the inputs */
if (a == *output || b == *output) {
assert(! invlist_is_iterating(*output));
@@ -7794,7 +7784,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
}
void
-Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
+Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
{
/* Take the intersection of two inversion lists and point <i> to it. *i
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7811,8 +7801,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* union above
*/
- UV* array_a; /* a's array */
- UV* array_b;
+ const UV* array_a; /* a's array */
+ const UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7877,23 +7867,17 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later, and clear the
- * flag as we don't have to do anything else later */
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
- array_b[0] = 0;
}
}
@@ -7984,7 +7968,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
/* Set result to final length, which can change the pointer to array_r, so
* re-find it */
if (len_r != _invlist_len(r)) {
- invlist_set_len(r, len_r);
+ invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
invlist_trim(r);
array_r = invlist_array(r);
}
@@ -8000,11 +7984,6 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
}
}
- /* If we've changed b, restore it */
- if (complement_b) {
- array_b[0] = 1;
- }
-
/* We may be removing a reference to one of the inputs */
if (a == *i || b == *i) {
assert(! invlist_is_iterating(*i));
@@ -8073,27 +8052,17 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
* have a zero; removes it otherwise. As described above, the data
* structure is set up so that this is very efficient */
- UV* len_pos = _get_invlist_len_addr(invlist);
-
PERL_ARGS_ASSERT__INVLIST_INVERT;
assert(! invlist_is_iterating(invlist));
/* The inverse of matching nothing is matching everything */
- if (*len_pos == 0) {
+ if (_invlist_len(invlist) == 0) {
_append_range_to_invlist(invlist, 0, UV_MAX);
return;
}
- /* The exclusive or complents 0 to 1; and 1 to 0. If the result is 1, the
- * zero element was a 0, so it is being removed, so the length decrements
- * by 1; and vice-versa. SvCUR is unaffected */
- if (*get_invlist_zero_addr(invlist) ^= 1) {
- (*len_pos)--;
- }
- else {
- (*len_pos)++;
- }
+ *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
}
void
@@ -8123,11 +8092,11 @@ Perl__invlist_invert_prop(pTHX_ SV* const invlist)
invlist_extend(invlist, len);
array = invlist_array(invlist);
}
- invlist_set_len(invlist, len);
+ invlist_set_len(invlist, len, *get_invlist_offset_addr(invlist));
array[len - 1] = PERL_UNICODE_MAX + 1;
}
else { /* Remove the 0x110000 */
- invlist_set_len(invlist, len - 1);
+ invlist_set_len(invlist, len - 1, *get_invlist_offset_addr(invlist));
}
}
@@ -8145,17 +8114,19 @@ S_invlist_clone(pTHX_ SV* const invlist)
/* Need to allocate extra space to accommodate Perl's addition of a
* trailing NUL to SvPV's, since it thinks they are always strings */
SV* new_invlist = _new_invlist(_invlist_len(invlist) + 1);
- STRLEN length = SvCUR(invlist);
+ STRLEN physical_length = SvCUR(invlist);
+ bool offset = *(get_invlist_offset_addr(invlist));
PERL_ARGS_ASSERT_INVLIST_CLONE;
- SvCUR_set(new_invlist, length); /* This isn't done automatically */
- Copy(SvPVX(invlist), SvPVX(new_invlist), length, char);
+ *(get_invlist_offset_addr(new_invlist)) = offset;
+ invlist_set_len(new_invlist, _invlist_len(invlist), offset);
+ Copy(SvPVX(invlist), SvPVX(new_invlist), physical_length, char);
return new_invlist;
}
-PERL_STATIC_INLINE UV*
+PERL_STATIC_INLINE STRLEN*
S_get_invlist_iter_addr(pTHX_ SV* invlist)
{
/* Return the address of the UV that contains the current iteration
@@ -8163,17 +8134,7 @@ S_get_invlist_iter_addr(pTHX_ SV* invlist)
PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR;
- return (UV *) (SvPVX(invlist) + (INVLIST_ITER_OFFSET * sizeof (UV)));
-}
-
-PERL_STATIC_INLINE UV*
-S_get_invlist_version_id_addr(pTHX_ SV* invlist)
-{
- /* Return the address of the UV that contains the version id. */
-
- PERL_ARGS_ASSERT_GET_INVLIST_VERSION_ID_ADDR;
-
- return (UV *) (SvPVX(invlist) + (INVLIST_VERSION_ID_OFFSET * sizeof (UV)));
+ return &(((XINVLIST*) SvANY(invlist))->iterator);
}
PERL_STATIC_INLINE void
@@ -8197,7 +8158,7 @@ S_invlist_iterfinish(pTHX_ SV* invlist)
PERL_ARGS_ASSERT_INVLIST_ITERFINISH;
- *get_invlist_iter_addr(invlist) = UV_MAX;
+ *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX;
}
STATIC bool
@@ -8210,14 +8171,14 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
* <*start> and <*end> are unchanged, and the next call to this function
* will start over at the beginning of the list */
- UV* pos = get_invlist_iter_addr(invlist);
+ STRLEN* pos = get_invlist_iter_addr(invlist);
UV len = _invlist_len(invlist);
UV *array;
PERL_ARGS_ASSERT_INVLIST_ITERNEXT;
if (*pos >= len) {
- *pos = UV_MAX; /* Force iterinit() to be required next time */
+ *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */
return FALSE;
}
@@ -8240,7 +8201,7 @@ S_invlist_is_iterating(pTHX_ SV* const invlist)
{
PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
- return *(get_invlist_iter_addr(invlist)) < UV_MAX;
+ return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
}
PERL_STATIC_INLINE UV
@@ -8343,14 +8304,14 @@ Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
#if 0
bool
-S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
+S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
{
/* Return a boolean as to if the two passed in inversion lists are
* identical. The final argument, if TRUE, says to take the complement of
* the second inversion list before doing the comparison */
- UV* array_a = invlist_array(a);
- UV* array_b = invlist_array(b);
+ const UV* array_a = invlist_array(a);
+ const UV* array_b = invlist_array(b);
UV len_a = _invlist_len(a);
UV len_b = _invlist_len(b);
@@ -8372,20 +8333,15 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
/* Otherwise, to complement, we invert. Here, the first element is
* 0, just remove it. To do this, we just pretend the array starts
- * one later, and clear the flag as we don't have to do anything
- * else later */
+ * one later */
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
array_b[0] = 0;
@@ -8405,22 +8361,14 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
}
}
- if (complement_b) {
- array_b[0] = 1;
- }
return retval;
}
#endif
#undef HEADER_LENGTH
-#undef INVLIST_INITIAL_LENGTH
#undef TO_INTERNAL_SIZE
#undef FROM_INTERNAL_SIZE
-#undef INVLIST_LEN_OFFSET
-#undef INVLIST_ZERO_OFFSET
-#undef INVLIST_ITER_OFFSET
#undef INVLIST_VERSION_ID
-#undef INVLIST_PREVIOUS_INDEX_OFFSET
/* End of inversion list object */
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 67b6e417fc..b857b10580 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -15,7 +15,7 @@ require 'regen/regen_lib.pl';
# in the headers is used to minimize the possibility of things getting
# out-of-sync, or the wrong data structure being passed. Currently that
# random number is:
-my $VERSION_DATA_STRUCTURE_TYPE = 290655244;
+my $VERSION_DATA_STRUCTURE_TYPE = 148565664;
my $out_fh = open_new('charclass_invlists.h', '>',
{style => '*', by => $0,
@@ -36,37 +36,22 @@ sub output_invlist ($$) {
# Output the inversion list $invlist using the name $name for it.
# It is output in the exact internal form for inversion lists.
- my $zero_or_one; # Is the last element of the header 0, or 1 ?
-
- # If the first element is 0, it goes in the header, instead of the body
- if ($invlist->[0] == 0) {
- shift @$invlist;
-
- $zero_or_one = 0;
-
- # Add a dummy 0 at the end so that the length is constant. inversion
- # lists are always stored with enough room so that if they change from
- # beginning with 0, they don't have to grow.
- push @$invlist, 0;
- }
- else {
+ # Is the last element of the header 0, or 1 ?
+ my $zero_or_one = 0;
+ if ($invlist->[0] != 0) {
+ unshift @$invlist, 0;
$zero_or_one = 1;
}
+ my $count = @$invlist;
print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name};
- print $out_fh "\nstatic UV ${name}_invlist[] = {\n";
-
- print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
+ print $out_fh "\nstatic const UV ${name}_invlist[] = {\n";
- # This should be UV_MAX, but I (khw) am not confident that the suffixes
- # for specifying the constant are portable, e.g. 'ull' on a 32 bit
- # machine that is configured to use 64 bits; might need a Configure probe
- print $out_fh "\t0,\t/* Current iteration position */\n";
- print $out_fh "\t0,\t/* Cache of previous search index result */\n";
+ print $out_fh "\t$count,\t/* Number of elements */\n";
print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n";
print $out_fh "\t", $zero_or_one,
- ",\t/* 0 if this is the first element of the list proper;",
- "\n\t\t 1 if the next element is the first */\n";
+ ",\t/* 0 if the list starts at 0;",
+ "\n\t\t 1 if it starts at the element beyond 0 */\n";
# The main body are the UVs passed in to this routine. Do the final
# element separately
diff --git a/sv.c b/sv.c
index e5f60a2c39..3977204781 100644
--- a/sv.c
+++ b/sv.c
@@ -881,11 +881,6 @@ static const struct body_details bodies_by_type[] = {
/* HEs use this offset for their arena. */
{ 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },
- /* The bind placeholder pretends to be an RV for now.
- Also it's marked as "can't upgrade" to stop anyone using it before it's
- implemented. */
- { 0, 0, 0, SVt_DUMMY, TRUE, NONV, NOARENA, 0 },
-
/* IVs are in the head, so the allocation size is 0. */
{ 0,
sizeof(IV), /* This is used to copy out the IV body. */
@@ -903,6 +898,12 @@ static const struct body_details bodies_by_type[] = {
SVt_PV, FALSE, NONV, HASARENA,
FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },
+ { sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
+ copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
+ + STRUCT_OFFSET(XPV, xpv_cur),
+ SVt_INVLIST, TRUE, NONV, HASARENA,
+ FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },
+
{ sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
+ STRUCT_OFFSET(XPV, xpv_cur),
@@ -1340,6 +1341,7 @@ Perl_sv_upgrade(pTHX_ SV *const sv, svtype new_type)
case SVt_PVGV:
case SVt_PVCV:
case SVt_PVLV:
+ case SVt_INVLIST:
case SVt_REGEXP:
case SVt_PVMG:
case SVt_PVNV:
@@ -4127,7 +4129,7 @@ Perl_sv_setsv_flags(pTHX_ SV *dstr, SV* sstr, const I32 flags)
}
break;
- /* case SVt_DUMMY: */
+ case SVt_INVLIST:
case SVt_PVLV:
case SVt_PVGV:
case SVt_PVMG:
@@ -6212,7 +6214,7 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
SvREFCNT_dec(SvSTASH(sv));
}
switch (type) {
- /* case SVt_DUMMY: */
+ /* case SVt_INVLIST: */
case SVt_PVIO:
if (IoIFP(sv) &&
IoIFP(sv) != PerlIO_stdin() &&
@@ -6338,6 +6340,7 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
case SVt_PVMG:
case SVt_PVNV:
case SVt_PVIV:
+ case SVt_INVLIST:
case SVt_PV:
freescalar:
/* Don't bother with SvOOK_off(sv); as we're only going to
@@ -9474,7 +9477,7 @@ Perl_sv_reftype(pTHX_ const SV *const sv, const int ob)
? "GLOB" : "SCALAR");
case SVt_PVFM: return "FORMAT";
case SVt_PVIO: return "IO";
- case SVt_DUMMY: return "DUMMY";
+ case SVt_INVLIST: return "INVLIST";
case SVt_REGEXP: return "REGEXP";
default: return "UNKNOWN";
}
@@ -12217,7 +12220,6 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
SvANY(dstr) = new_XNV();
SvNV_set(dstr, SvNVX(sstr));
break;
- /* case SVt_DUMMY: */
default:
{
/* These are all the types that need complex bodies allocating. */
@@ -12242,6 +12244,7 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
case SVt_PVMG:
case SVt_PVNV:
case SVt_PVIV:
+ case SVt_INVLIST:
case SVt_PV:
assert(sv_type_details->body_size);
if (sv_type_details->arena) {
diff --git a/sv.h b/sv.h
index 1f9145f6bc..7110b4c13d 100644
--- a/sv.h
+++ b/sv.h
@@ -29,6 +29,7 @@ The types are:
SVt_PVIV
SVt_PVNV
SVt_PVMG
+ SVt_INVLIST
SVt_REGEXP
SVt_PVGV
SVt_PVLV
@@ -56,7 +57,8 @@ typeglob has been assigned. Assigning to it again will stop it from being
a typeglob. SVt_PVLV represents a scalar that delegates to another scalar
behind the scenes. It is used, e.g., for the return value of C<substr> and
for tied hash and array elements. It can hold any scalar value, including
-a typeglob. SVt_REGEXP is for regular expressions.
+a typeglob. SVt_REGEXP is for regular expressions. SVt_INVLIST is for Perl
+core internal use only.
SVt_PVMG represents a "normal" scalar (not a typeglob, regular expression,
or delegate). Since most scalars do not need all the internal fields of a
@@ -90,6 +92,9 @@ Type flag for scalars. See L</svtype>.
=for apidoc AmU||SVt_PVMG
Type flag for scalars. See L</svtype>.
+=for apidoc AmU||SVt_INVLIST
+Type flag for scalars. See L</svtype>.
+
=for apidoc AmU||SVt_REGEXP
Type flag for regular expressions. See L</svtype>.
@@ -119,11 +124,12 @@ Type flag for I/O objects. See L</svtype>.
typedef enum {
SVt_NULL, /* 0 */
- SVt_DUMMY, /* 1 */
- SVt_IV, /* 2 */
- SVt_NV, /* 3 */
+ /* BIND was here, before INVLIST replaced it. */
+ SVt_IV, /* 1 */
+ SVt_NV, /* 2 */
/* RV was here, before it was merged with IV. */
- SVt_PV, /* 4 */
+ SVt_PV, /* 3 */
+ SVt_INVLIST, /* 4, implemented as a PV */
SVt_PVIV, /* 5 */
SVt_PVNV, /* 6 */
SVt_PVMG, /* 7 */
@@ -140,7 +146,9 @@ typedef enum {
} svtype;
/* *** any alterations to the SV types above need to be reflected in
- * SVt_MASK and the various PL_valid_types_* tables */
+ * SVt_MASK and the various PL_valid_types_* tables. As of this writing those
+ * tables are in perl.h. There are also two affected names tables in dump.c,
+ * one in B.xs, and 'bodies_by_type[]' in sv.c */
#define SVt_MASK 0xf /* smallest bitmask that covers all types */
@@ -520,6 +528,13 @@ struct xpvlv {
char xlv_flags; /* 1 = negative offset 2 = negative len */
};
+struct xpvinvlist {
+ _XPV_HEAD;
+ IV prev_index;
+ STRLEN iterator;
+ bool is_offset; /* */
+};
+
/* This structure works in 3 ways - regular scalar, GV with GP, or fast
Boyer-Moore. */
struct xpvgv {