summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--charclass_invlists.h238
-rw-r--r--embed.fnc10
-rw-r--r--inline_invlist.c21
-rw-r--r--proto.h4
-rw-r--r--regcomp.c124
-rw-r--r--regen/mk_invlists.pl32
6 files changed, 213 insertions, 216 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index b5d71afb16..57fbe47af1 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -13,9 +13,10 @@ static UV Latin1_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
256,
0
};
@@ -28,9 +29,10 @@ static UV AboveLatin1_invlist[] = {
1, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
256
};
@@ -42,9 +44,10 @@ static UV ASCII_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
128,
0
};
@@ -57,9 +60,10 @@ static UV L1Cased_invlist[] = {
16, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -86,9 +90,10 @@ static UV VertSpace_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
10,
14,
133,
@@ -105,9 +110,10 @@ static UV PerlSpace_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -122,9 +128,10 @@ static UV XPerlSpace_invlist[] = {
22, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -157,9 +164,10 @@ static UV PosixAlnum_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -176,9 +184,10 @@ static UV L1PosixAlnum_invlist[] = {
18, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -207,9 +216,10 @@ static UV PosixAlpha_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -224,9 +234,10 @@ static UV L1PosixAlpha_invlist[] = {
16, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
97,
@@ -253,9 +264,10 @@ static UV PosixBlank_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
10,
32,
@@ -270,9 +282,10 @@ static UV XPosixBlank_invlist[] = {
18, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
10,
32,
@@ -301,9 +314,10 @@ static UV PosixCntrl_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
128,
@@ -318,9 +332,10 @@ static UV XPosixCntrl_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 0, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 0, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
160,
@@ -335,9 +350,10 @@ static UV PosixDigit_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58
};
@@ -350,9 +366,10 @@ static UV PosixGraph_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
127
};
@@ -365,9 +382,10 @@ static UV L1PosixGraph_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
127,
161,
@@ -382,9 +400,10 @@ static UV PosixLower_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
97,
123
};
@@ -397,9 +416,10 @@ static UV L1PosixLower_invlist[] = {
12, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
97,
123,
170,
@@ -422,9 +442,10 @@ static UV PosixPrint_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127
};
@@ -437,9 +458,10 @@ static UV L1PosixPrint_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
32,
127,
160,
@@ -454,9 +476,10 @@ static UV PosixPunct_invlist[] = {
8, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
48,
58,
@@ -475,9 +498,10 @@ static UV L1PosixPunct_invlist[] = {
20, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
33,
48,
58,
@@ -508,9 +532,10 @@ static UV PosixSpace_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -525,9 +550,10 @@ static UV XPosixSpace_invlist[] = {
22, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
9,
14,
32,
@@ -560,9 +586,10 @@ static UV PosixUpper_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91
};
@@ -575,9 +602,10 @@ static UV L1PosixUpper_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
65,
91,
192,
@@ -594,9 +622,10 @@ static UV PosixWord_invlist[] = {
8, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -615,9 +644,10 @@ static UV L1PosixWord_invlist[] = {
20, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -648,9 +678,10 @@ static UV PosixXDigit_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -667,9 +698,10 @@ static UV XPosixXDigit_invlist[] = {
12, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
48,
58,
65,
@@ -690,9 +722,10 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = {
44, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
700,
701,
776,
@@ -745,9 +778,10 @@ static UV _Perl_Multi_Char_Folds_invlist[] = {
58, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 290655244, /* Version and data structure type */
- 1, /* 0 if this is the first element of the list proper;
- 1 if the next element is the first */
+ 1039476070, /* Version and data structure type */
+ 1, /* 0 if the list starts at 0;
+ 1 if it starts at the element beyond 0 */
+ 0,
223,
224,
304,
diff --git a/embed.fnc b/embed.fnc
index bd78b71f74..0b8122492c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1089,7 +1089,7 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \
|NULLOK SV **listsvp|NULLOK SV **altsvp
#ifdef PERL_IN_REGCOMP_C
EMsR |SV* |_new_invlist_C_array|NN UV* list
-: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|bool complement_b
+: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b
#endif
Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
@@ -1452,9 +1452,13 @@ EiMR |UV |invlist_highest|NN SV* const invlist
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
-EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
+EXpM |void |_invlist_intersection_maybe_complement_2nd \
+ |NULLOK SV* const a|NN SV* const b \
+ |const bool complement_b|NN SV** i
EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
-EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
+EXpM |void |_invlist_union_maybe_complement_2nd \
+ |NULLOK SV* const a|NN SV* const b \
+ |const bool complement_b|NN SV** output
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXpM |void |_invlist_invert_prop|NN SV* const invlist
diff --git a/inline_invlist.c b/inline_invlist.c
index b56ce60002..b194c0d264 100644
--- a/inline_invlist.c
+++ b/inline_invlist.c
@@ -20,20 +20,21 @@
* insert that at this location. Then, if an auxiliary program doesn't change
* correspondingly, it will be discovered immediately */
#define INVLIST_VERSION_ID_OFFSET 3
-#define INVLIST_VERSION_ID 290655244
+#define INVLIST_VERSION_ID 1039476070
+
+#define INVLIST_ZERO_OFFSET 4 /* 0 or 1 */
+/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
+ * contains the code point U+00000, and begins at element [0] in the array,
+ * which always contains 0. If 1, the inversion list doesn't contain U+0000,
+ * and it begins at element [1]. Inverting an inversion list consists of
+ * adding or removing the 0 at the beginning of it. By reserving a space for
+ * that 0, inversion can be made very fast: we just flip this UV */
/* For safety, when adding new elements, remember to #undef them at the end of
* the inversion list code section */
-#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */
-/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
- * contains the code point U+00000, and begins here. If 1, the inversion list
- * doesn't contain U+0000, and it begins at the next UV in the array.
- * Inverting an inversion list consists of adding or removing the 0 at the
- * beginning of it. By reserving a space for that 0, inversion can be made
- * very fast */
-
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
+#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 2) /* includes 1 for the constant
+ 0 element */
/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
* etc */
diff --git a/proto.h b/proto.h
index 5c21bf7710..52757e3882 100644
--- a/proto.h
+++ b/proto.h
@@ -6862,7 +6862,7 @@ PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start,
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
+PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND \
@@ -6893,7 +6893,7 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
+PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
diff --git a/regcomp.c b/regcomp.c
index e150654ba1..6bd9084209 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7047,10 +7047,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
* list.)
* Taking the complement (inverting) an inversion list is quite simple, if the
* first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element at the beginning of each inversion
- * list to contain 0 when the list contains 0, and contains 1 otherwise. The
- * actual beginning of the list is either that element if 0, or the next one if
- * 1.
+ * This implementation reserves an element (considered to be the final element
+ * of the header) at the beginning of each inversion list to always contain 0;
+ * there is an additional flag in the header which indicates if the list begins
+ * at the 0, or is offset to begin at the next element.
*
* More about inversion lists can be found in "Unicode Demystified"
* Chapter 13 by Richard Gillam, published by Addison-Wesley.
@@ -7075,11 +7075,11 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
{
/* Returns a pointer to the first element in the inversion list's array.
* This is called upon initialization of an inversion list. Where the
- * array begins depends on whether the list has the code point U+0000
- * in it or not. The other parameter tells it whether the code that
- * follows this call is about to put a 0 in the inversion list or not.
- * The first element is either the element with 0, if 0, or the next one,
- * if 1 */
+ * array begins depends on whether the list has the code point U+0000 in it
+ * or not. The other parameter tells it whether the code that follows this
+ * call is about to put a 0 in the inversion list or not. The first
+ * element is either the final part of the header reserved for 0, if TRUE,
+ * or the first element of the non-heading part, if FALSE */
UV* zero = get_invlist_zero_addr(invlist);
@@ -7090,7 +7090,8 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
/* 1^1 = 0; 1^0 = 1 */
*zero = 1 ^ will_have_0;
- return zero + *zero;
+ *(zero + 1) = 0;
+ return 1 + zero + *zero;
}
PERL_STATIC_INLINE UV*
@@ -7108,10 +7109,12 @@ S_invlist_array(pTHX_ SV* const invlist)
assert(*get_invlist_zero_addr(invlist) == 0
|| *get_invlist_zero_addr(invlist) == 1);
- /* The array begins either at the element reserved for zero if the
- * list contains 0 (that element will be set to 0), or otherwise the next
- * element (in which case the reserved element will be set to 1). */
- return (UV *) (get_invlist_zero_addr(invlist)
+ /* The array begins either at the header element reserved for zero or the
+ * element after that. The reserved element is 1 past the zero_addr
+ * element; the latter contains 0 or 1 to indicate how much additionally to
+ * add */
+ assert(0 == *(1 + get_invlist_zero_addr(invlist)));
+ return (UV *) (1 + get_invlist_zero_addr(invlist)
+ *get_invlist_zero_addr(invlist));
}
@@ -7127,19 +7130,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
assert(len <= SvLEN(invlist));
SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
- /* If the list contains U+0000, that element is part of the header,
- * and should not be counted as part of the array. It will contain
- * 0 in that case, and 1 otherwise. So we could flop 0=>1, 1=>0 and
- * subtract:
- * SvCUR_set(invlist,
- * TO_INTERNAL_SIZE(len
- * - (*get_invlist_zero_addr(inv_list) ^ 1)));
- * But, this is only valid if len is not 0. The consequences of not doing
- * this is that the memory allocation code may think that 1 more UV is
- * being used than actually is, and so might do an unnecessary grow. That
- * seems worth not bothering to make this the precise amount.
- *
- * Note that when inverting, SvCUR shouldn't change */
+ /* Note that when inverting, SvCUR shouldn't change */
}
PERL_STATIC_INLINE IV*
@@ -7191,10 +7182,8 @@ S_invlist_max(pTHX_ SV* const invlist)
PERL_STATIC_INLINE UV*
S_get_invlist_zero_addr(pTHX_ SV* invlist)
{
- /* Return the address of the UV that is reserved to hold 0 if the inversion
- * list contains 0. This has to be the last element of the heading, as the
- * list proper starts with either it if 0, or the next element if not.
- * (But we force it to contain either 0 or 1) */
+ /* Return the address of the UV that says whether the inversion list is
+ * offset (it contains 1) or not (contains 0) */
PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR;
@@ -7211,6 +7200,7 @@ Perl__new_invlist(pTHX_ IV initial_size)
* system default is used instead */
SV* new_list;
+ UV* zero_addr;
if (initial_size < 0) {
initial_size = INVLIST_INITIAL_LEN;
@@ -7225,11 +7215,13 @@ Perl__new_invlist(pTHX_ IV initial_size)
/* This should force a segfault if a method doesn't initialize this
* properly */
- *get_invlist_zero_addr(new_list) = UV_MAX;
+ zero_addr = get_invlist_zero_addr(new_list);
+ *zero_addr = UV_MAX;
+ *(zero_addr + 1) = 0;
*get_invlist_previous_index_addr(new_list) = 0;
*get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 5
+#if HEADER_LENGTH != 6
# error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
#endif
@@ -7554,7 +7546,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
}
void
-Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
+Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
{
/* Take the union of two inversion lists and point <output> to it. *output
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7576,8 +7568,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
* return the larger of the input lists, but then outside code might need
* to keep track of whether to free the input list or not */
- UV* array_a; /* a's array */
- UV* array_b;
+ const UV* array_a; /* a's array */
+ const UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7645,23 +7637,17 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later, and clear the
- * flag as we don't have to do anything else later */
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
- array_b[0] = 0;
}
}
@@ -7778,11 +7764,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
}
}
- /* If we've changed b, restore it */
- if (complement_b) {
- array_b[0] = 1;
- }
-
/* We may be removing a reference to one of the inputs */
if (a == *output || b == *output) {
assert(! invlist_is_iterating(*output));
@@ -7794,7 +7775,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co
}
void
-Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
+Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
{
/* Take the intersection of two inversion lists and point <i> to it. *i
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7811,8 +7792,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* union above
*/
- UV* array_a; /* a's array */
- UV* array_b;
+ const UV* array_a; /* a's array */
+ const UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7877,23 +7858,17 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later, and clear the
- * flag as we don't have to do anything else later */
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
- array_b[0] = 0;
}
}
@@ -8000,11 +7975,6 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
}
}
- /* If we've changed b, restore it */
- if (complement_b) {
- array_b[0] = 1;
- }
-
/* We may be removing a reference to one of the inputs */
if (a == *i || b == *i) {
assert(! invlist_is_iterating(*i));
@@ -8343,14 +8313,14 @@ Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
#if 0
bool
-S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
+S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
{
/* Return a boolean as to if the two passed in inversion lists are
* identical. The final argument, if TRUE, says to take the complement of
* the second inversion list before doing the comparison */
- UV* array_a = invlist_array(a);
- UV* array_b = invlist_array(b);
+ const UV* array_a = invlist_array(a);
+ const UV* array_b = invlist_array(b);
UV len_a = _invlist_len(a);
UV len_b = _invlist_len(b);
@@ -8372,20 +8342,15 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
/* Otherwise, to complement, we invert. Here, the first element is
* 0, just remove it. To do this, we just pretend the array starts
- * one later, and clear the flag as we don't have to do anything
- * else later */
+ * one later */
array_b++;
len_b--;
- complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we unshift a 0 before the
- * array. The data structure reserves a space for that 0 (which
- * should be a '1' right now), so physical shifting is unneeded,
- * but temporarily change that element to 0. Before exiting the
- * routine, we must restore the element to '1' */
+ /* But if the first element is not zero, we pretend the list starts
+ * at the 0 that is always stored immediately before the array. */
array_b--;
len_b++;
array_b[0] = 0;
@@ -8405,9 +8370,6 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
}
}
- if (complement_b) {
- array_b[0] = 1;
- }
return retval;
}
#endif
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 67b6e417fc..8e131d6f58 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -15,7 +15,7 @@ require 'regen/regen_lib.pl';
# in the headers is used to minimize the possibility of things getting
# out-of-sync, or the wrong data structure being passed. Currently that
# random number is:
-my $VERSION_DATA_STRUCTURE_TYPE = 290655244;
+my $VERSION_DATA_STRUCTURE_TYPE = 1039476070;
my $out_fh = open_new('charclass_invlists.h', '>',
{style => '*', by => $0,
@@ -36,27 +36,23 @@ sub output_invlist ($$) {
# Output the inversion list $invlist using the name $name for it.
# It is output in the exact internal form for inversion lists.
- my $zero_or_one; # Is the last element of the header 0, or 1 ?
-
- # If the first element is 0, it goes in the header, instead of the body
- if ($invlist->[0] == 0) {
- shift @$invlist;
-
- $zero_or_one = 0;
-
- # Add a dummy 0 at the end so that the length is constant. inversion
- # lists are always stored with enough room so that if they change from
- # beginning with 0, they don't have to grow.
- push @$invlist, 0;
- }
- else {
+ # Is the last element of the header 0, or 1 ?
+ my $zero_or_one = 0;
+ my $count = @$invlist;
+ if ($invlist->[0] != 0) {
+ unshift @$invlist, 0;
$zero_or_one = 1;
}
+ else { # Temporary until we disentangle the header from the body. In the
+ # meantime, Adding this 0 at the end of lists that begin with 0
+ # will keep the code from reading beyond the end of the array.
+ push @$invlist, 0;
+ }
print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name};
print $out_fh "\nstatic UV ${name}_invlist[] = {\n";
- print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
+ print $out_fh "\t$count,\t/* Number of elements */\n";
# This should be UV_MAX, but I (khw) am not confident that the suffixes
# for specifying the constant are portable, e.g. 'ull' on a 32 bit
@@ -65,8 +61,8 @@ sub output_invlist ($$) {
print $out_fh "\t0,\t/* Cache of previous search index result */\n";
print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n";
print $out_fh "\t", $zero_or_one,
- ",\t/* 0 if this is the first element of the list proper;",
- "\n\t\t 1 if the next element is the first */\n";
+ ",\t/* 0 if the list starts at 0;",
+ "\n\t\t 1 if it starts at the element beyond 0 */\n";
# The main body are the UVs passed in to this routine. Do the final
# element separately