summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-07-04 22:01:05 -0600
committerKarl Williamson <public@khwilliamson.com>2013-07-04 22:44:52 -0600
commitde353015643cf10b437d714d3483c1209e079916 (patch)
treebf756d015c7d5c70451fc1f071c0c79ef3977565
parentac7a6f5849b5fd57b2e837f4ddbc18c992610e9c (diff)
downloadperl-de353015643cf10b437d714d3483c1209e079916.tar.gz
Revert "regcomp.c: Add a constant 0 element before inversion lists"
This reverts commit 533c4e2f08b42d977e5004e823d4849f7473d2d0. This continues the backing out of this topic branch. A bisect shows that the first commit exhibiting an error is the first one in the branch.
-rw-r--r--charclass_invlists.h250
-rw-r--r--embed.fnc10
-rw-r--r--inline_invlist.c21
-rw-r--r--proto.h4
-rw-r--r--regcomp.c124
-rw-r--r--regen/mk_invlists.pl27
6 files changed, 224 insertions, 212 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 690526dd6e..b5d71afb16 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -13,11 +13,11 @@ static UV Latin1_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 0, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
- 256
+ 290655244, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 256,
+ 0
};
#endif
@@ -28,10 +28,9 @@ static UV AboveLatin1_invlist[] = {
1, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
256
};
@@ -43,11 +42,11 @@ static UV ASCII_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 0, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
- 128
+ 290655244, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 128,
+ 0
};
#endif
@@ -58,10 +57,9 @@ static UV L1Cased_invlist[] = {
16, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
65,
91,
97,
@@ -88,10 +86,9 @@ static UV VertSpace_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
10,
14,
133,
@@ -108,10 +105,9 @@ static UV PerlSpace_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
14,
32,
@@ -126,10 +122,9 @@ static UV XPerlSpace_invlist[] = {
22, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
14,
32,
@@ -162,10 +157,9 @@ static UV PosixAlnum_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -182,10 +176,9 @@ static UV L1PosixAlnum_invlist[] = {
18, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -214,10 +207,9 @@ static UV PosixAlpha_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
65,
91,
97,
@@ -232,10 +224,9 @@ static UV L1PosixAlpha_invlist[] = {
16, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
65,
91,
97,
@@ -262,10 +253,9 @@ static UV PosixBlank_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
10,
32,
@@ -280,10 +270,9 @@ static UV XPosixBlank_invlist[] = {
18, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
10,
32,
@@ -312,13 +301,13 @@ static UV PosixCntrl_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 0, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
32,
127,
- 128
+ 128,
+ 0
};
#endif
@@ -329,13 +318,13 @@ static UV XPosixCntrl_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 0, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
32,
127,
- 160
+ 160,
+ 0
};
#endif
@@ -346,10 +335,9 @@ static UV PosixDigit_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58
};
@@ -362,10 +350,9 @@ static UV PosixGraph_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
33,
127
};
@@ -378,10 +365,9 @@ static UV L1PosixGraph_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
33,
127,
161,
@@ -396,10 +382,9 @@ static UV PosixLower_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
97,
123
};
@@ -412,10 +397,9 @@ static UV L1PosixLower_invlist[] = {
12, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
97,
123,
170,
@@ -438,10 +422,9 @@ static UV PosixPrint_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
32,
127
};
@@ -454,10 +437,9 @@ static UV L1PosixPrint_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
32,
127,
160,
@@ -472,10 +454,9 @@ static UV PosixPunct_invlist[] = {
8, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
33,
48,
58,
@@ -494,10 +475,9 @@ static UV L1PosixPunct_invlist[] = {
20, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
33,
48,
58,
@@ -528,10 +508,9 @@ static UV PosixSpace_invlist[] = {
4, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
14,
32,
@@ -546,10 +525,9 @@ static UV XPosixSpace_invlist[] = {
22, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
9,
14,
32,
@@ -582,10 +560,9 @@ static UV PosixUpper_invlist[] = {
2, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
65,
91
};
@@ -598,10 +575,9 @@ static UV L1PosixUpper_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
65,
91,
192,
@@ -618,10 +594,9 @@ static UV PosixWord_invlist[] = {
8, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -640,10 +615,9 @@ static UV L1PosixWord_invlist[] = {
20, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -674,10 +648,9 @@ static UV PosixXDigit_invlist[] = {
6, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -694,10 +667,9 @@ static UV XPosixXDigit_invlist[] = {
12, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
48,
58,
65,
@@ -718,10 +690,9 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = {
44, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
700,
701,
776,
@@ -774,10 +745,9 @@ static UV _Perl_Multi_Char_Folds_invlist[] = {
58, /* Number of elements */
0, /* Current iteration position */
0, /* Cache of previous search index result */
- 1039476070, /* Version and data structure type */
- 1, /* 0 if the list starts at 0;
- 1 if it starts at the element beyond 0 */
- 0,
+ 290655244, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
223,
224,
304,
diff --git a/embed.fnc b/embed.fnc
index 8ad9a13aeb..5c27b27184 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1085,7 +1085,7 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \
|NULLOK SV **listsvp|NULLOK SV **altsvp
#ifdef PERL_IN_REGCOMP_C
EMsR |SV* |_new_invlist_C_array|NN UV* list
-: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b
+: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|bool complement_b
#endif
Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
@@ -1446,13 +1446,9 @@ EiMR |UV |invlist_highest|NN SV* const invlist
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
-EXpM |void |_invlist_intersection_maybe_complement_2nd \
- |NULLOK SV* const a|NN SV* const b \
- |const bool complement_b|NN SV** i
+EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
-EXpM |void |_invlist_union_maybe_complement_2nd \
- |NULLOK SV* const a|NN SV* const b \
- |const bool complement_b|NN SV** output
+EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXpM |void |_invlist_invert_prop|NN SV* const invlist
diff --git a/inline_invlist.c b/inline_invlist.c
index b194c0d264..b56ce60002 100644
--- a/inline_invlist.c
+++ b/inline_invlist.c
@@ -20,21 +20,20 @@
* insert that at this location. Then, if an auxiliary program doesn't change
* correspondingly, it will be discovered immediately */
#define INVLIST_VERSION_ID_OFFSET 3
-#define INVLIST_VERSION_ID 1039476070
-
-#define INVLIST_ZERO_OFFSET 4 /* 0 or 1 */
-/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
- * contains the code point U+00000, and begins at element [0] in the array,
- * which always contains 0. If 1, the inversion list doesn't contain U+0000,
- * and it begins at element [1]. Inverting an inversion list consists of
- * adding or removing the 0 at the beginning of it. By reserving a space for
- * that 0, inversion can be made very fast: we just flip this UV */
+#define INVLIST_VERSION_ID 290655244
/* For safety, when adding new elements, remember to #undef them at the end of
* the inversion list code section */
-#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 2) /* includes 1 for the constant
- 0 element */
+#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */
+/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list
+ * contains the code point U+00000, and begins here. If 1, the inversion list
+ * doesn't contain U+0000, and it begins at the next UV in the array.
+ * Inverting an inversion list consists of adding or removing the 0 at the
+ * beginning of it. By reserving a space for that 0, inversion can be made
+ * very fast */
+
+#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1)
/* An element is in an inversion list iff its index is even numbered: 0, 2, 4,
* etc */
diff --git a/proto.h b/proto.h
index f5dc0dceeb..9a06590ab0 100644
--- a/proto.h
+++ b/proto.h
@@ -6839,7 +6839,7 @@ PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start,
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
+PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND \
@@ -6870,7 +6870,7 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
+PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_4);
#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
diff --git a/regcomp.c b/regcomp.c
index b2bf63c117..4885c0b5a6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7047,10 +7047,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
* list.)
* Taking the complement (inverting) an inversion list is quite simple, if the
* first element is 0, remove it; otherwise add a 0 element at the beginning.
- * This implementation reserves an element (considered to be the final element
- * of the header) at the beginning of each inversion list to always contain 0;
- * there is an additional flag in the header which indicates if the list begins
- * at the 0, or is offset to begin at the next element.
+ * This implementation reserves an element at the beginning of each inversion
+ * list to contain 0 when the list contains 0, and contains 1 otherwise. The
+ * actual beginning of the list is either that element if 0, or the next one if
+ * 1.
*
* More about inversion lists can be found in "Unicode Demystified"
* Chapter 13 by Richard Gillam, published by Addison-Wesley.
@@ -7075,11 +7075,11 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
{
/* Returns a pointer to the first element in the inversion list's array.
* This is called upon initialization of an inversion list. Where the
- * array begins depends on whether the list has the code point U+0000 in it
- * or not. The other parameter tells it whether the code that follows this
- * call is about to put a 0 in the inversion list or not. The first
- * element is either the final part of the header reserved for 0, if TRUE,
- * or the first element of the non-heading part, if FALSE */
+ * array begins depends on whether the list has the code point U+0000
+ * in it or not. The other parameter tells it whether the code that
+ * follows this call is about to put a 0 in the inversion list or not.
+ * The first element is either the element with 0, if 0, or the next one,
+ * if 1 */
UV* zero = get_invlist_zero_addr(invlist);
@@ -7090,8 +7090,7 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0)
/* 1^1 = 0; 1^0 = 1 */
*zero = 1 ^ will_have_0;
- *(zero + 1) = 0;
- return 1 + zero + *zero;
+ return zero + *zero;
}
PERL_STATIC_INLINE UV*
@@ -7109,12 +7108,10 @@ S_invlist_array(pTHX_ SV* const invlist)
assert(*get_invlist_zero_addr(invlist) == 0
|| *get_invlist_zero_addr(invlist) == 1);
- /* The array begins either at the header element reserved for zero or the
- * element after that. The reserved element is 1 past the zero_addr
- * element; the latter contains 0 or 1 to indicate how much additionally to
- * add */
- assert(0 == *(1 + get_invlist_zero_addr(invlist)));
- return (UV *) (1 + get_invlist_zero_addr(invlist)
+ /* The array begins either at the element reserved for zero if the
+ * list contains 0 (that element will be set to 0), or otherwise the next
+ * element (in which case the reserved element will be set to 1). */
+ return (UV *) (get_invlist_zero_addr(invlist)
+ *get_invlist_zero_addr(invlist));
}
@@ -7130,7 +7127,19 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
assert(len <= SvLEN(invlist));
SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
- /* Note that when inverting, SvCUR shouldn't change */
+ /* If the list contains U+0000, that element is part of the header,
+ * and should not be counted as part of the array. It will contain
+ * 0 in that case, and 1 otherwise. So we could flop 0=>1, 1=>0 and
+ * subtract:
+ * SvCUR_set(invlist,
+ * TO_INTERNAL_SIZE(len
+ * - (*get_invlist_zero_addr(inv_list) ^ 1)));
+ * But, this is only valid if len is not 0. The consequences of not doing
+ * this is that the memory allocation code may think that 1 more UV is
+ * being used than actually is, and so might do an unnecessary grow. That
+ * seems worth not bothering to make this the precise amount.
+ *
+ * Note that when inverting, SvCUR shouldn't change */
}
PERL_STATIC_INLINE IV*
@@ -7182,8 +7191,10 @@ S_invlist_max(pTHX_ SV* const invlist)
PERL_STATIC_INLINE UV*
S_get_invlist_zero_addr(pTHX_ SV* invlist)
{
- /* Return the address of the UV that says whether the inversion list is
- * offset (it contains 1) or not (contains 0) */
+ /* Return the address of the UV that is reserved to hold 0 if the inversion
+ * list contains 0. This has to be the last element of the heading, as the
+ * list proper starts with either it if 0, or the next element if not.
+ * (But we force it to contain either 0 or 1) */
PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR;
@@ -7200,7 +7211,6 @@ Perl__new_invlist(pTHX_ IV initial_size)
* system default is used instead */
SV* new_list;
- UV* zero_addr;
if (initial_size < 0) {
initial_size = INVLIST_INITIAL_LEN;
@@ -7215,13 +7225,11 @@ Perl__new_invlist(pTHX_ IV initial_size)
/* This should force a segfault if a method doesn't initialize this
* properly */
- zero_addr = get_invlist_zero_addr(new_list);
- *zero_addr = UV_MAX;
- *(zero_addr + 1) = 0;
+ *get_invlist_zero_addr(new_list) = UV_MAX;
*get_invlist_previous_index_addr(new_list) = 0;
*get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID;
-#if HEADER_LENGTH != 6
+#if HEADER_LENGTH != 5
# error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length
#endif
@@ -7546,7 +7554,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
}
void
-Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output)
+Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
{
/* Take the union of two inversion lists and point <output> to it. *output
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7568,8 +7576,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b
* return the larger of the input lists, but then outside code might need
* to keep track of whether to free the input list or not */
- const UV* array_a; /* a's array */
- const UV* array_b;
+ UV* array_a; /* a's array */
+ UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7637,17 +7645,23 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later */
+ * do this, we just pretend the array starts one later, and clear the
+ * flag as we don't have to do anything else later */
if (array_b[0] == 0) {
array_b++;
len_b--;
+ complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we pretend the list starts
- * at the 0 that is always stored immediately before the array. */
+ /* But if the first element is not zero, we unshift a 0 before the
+ * array. The data structure reserves a space for that 0 (which
+ * should be a '1' right now), so physical shifting is unneeded,
+ * but temporarily change that element to 0. Before exiting the
+ * routine, we must restore the element to '1' */
array_b--;
len_b++;
+ array_b[0] = 0;
}
}
@@ -7764,6 +7778,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b
}
}
+ /* If we've changed b, restore it */
+ if (complement_b) {
+ array_b[0] = 1;
+ }
+
/* We may be removing a reference to one of the inputs */
if (a == *output || b == *output) {
assert(! invlist_is_iterating(*output));
@@ -7775,7 +7794,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const b
}
void
-Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i)
+Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i)
{
/* Take the intersection of two inversion lists and point <i> to it. *i
* SHOULD BE DEFINED upon input, and if it points to one of the two lists,
@@ -7792,8 +7811,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* union above
*/
- const UV* array_a; /* a's array */
- const UV* array_b;
+ UV* array_a; /* a's array */
+ UV* array_b;
UV len_a; /* length of a's array */
UV len_b;
@@ -7858,17 +7877,23 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
if (complement_b) {
/* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later */
+ * do this, we just pretend the array starts one later, and clear the
+ * flag as we don't have to do anything else later */
if (array_b[0] == 0) {
array_b++;
len_b--;
+ complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we pretend the list starts
- * at the 0 that is always stored immediately before the array. */
+ /* But if the first element is not zero, we unshift a 0 before the
+ * array. The data structure reserves a space for that 0 (which
+ * should be a '1' right now), so physical shifting is unneeded,
+ * but temporarily change that element to 0. Before exiting the
+ * routine, we must restore the element to '1' */
array_b--;
len_b++;
+ array_b[0] = 0;
}
}
@@ -7975,6 +8000,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
}
}
+ /* If we've changed b, restore it */
+ if (complement_b) {
+ array_b[0] = 1;
+ }
+
/* We may be removing a reference to one of the inputs */
if (a == *i || b == *i) {
assert(! invlist_is_iterating(*i));
@@ -8313,14 +8343,14 @@ Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header)
#if 0
bool
-S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
+S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b)
{
/* Return a boolean as to if the two passed in inversion lists are
* identical. The final argument, if TRUE, says to take the complement of
* the second inversion list before doing the comparison */
- const UV* array_a = invlist_array(a);
- const UV* array_b = invlist_array(b);
+ UV* array_a = invlist_array(a);
+ UV* array_b = invlist_array(b);
UV len_a = _invlist_len(a);
UV len_b = _invlist_len(b);
@@ -8342,15 +8372,20 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
/* Otherwise, to complement, we invert. Here, the first element is
* 0, just remove it. To do this, we just pretend the array starts
- * one later */
+ * one later, and clear the flag as we don't have to do anything
+ * else later */
array_b++;
len_b--;
+ complement_b = FALSE;
}
else {
- /* But if the first element is not zero, we pretend the list starts
- * at the 0 that is always stored immediately before the array. */
+ /* But if the first element is not zero, we unshift a 0 before the
+ * array. The data structure reserves a space for that 0 (which
+ * should be a '1' right now), so physical shifting is unneeded,
+ * but temporarily change that element to 0. Before exiting the
+ * routine, we must restore the element to '1' */
array_b--;
len_b++;
array_b[0] = 0;
@@ -8370,6 +8405,9 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
}
}
+ if (complement_b) {
+ array_b[0] = 1;
+ }
return retval;
}
#endif
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 27c080233c..67b6e417fc 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -15,7 +15,7 @@ require 'regen/regen_lib.pl';
# in the headers is used to minimize the possibility of things getting
# out-of-sync, or the wrong data structure being passed. Currently that
# random number is:
-my $VERSION_DATA_STRUCTURE_TYPE = 1039476070;
+my $VERSION_DATA_STRUCTURE_TYPE = 290655244;
my $out_fh = open_new('charclass_invlists.h', '>',
{style => '*', by => $0,
@@ -36,18 +36,27 @@ sub output_invlist ($$) {
# Output the inversion list $invlist using the name $name for it.
# It is output in the exact internal form for inversion lists.
- # Is the last element of the header 0, or 1 ?
- my $zero_or_one = 0;
- my $count = @$invlist;
- if ($invlist->[0] != 0) {
- unshift @$invlist, 0;
+ my $zero_or_one; # Is the last element of the header 0, or 1 ?
+
+ # If the first element is 0, it goes in the header, instead of the body
+ if ($invlist->[0] == 0) {
+ shift @$invlist;
+
+ $zero_or_one = 0;
+
+ # Add a dummy 0 at the end so that the length is constant. inversion
+ # lists are always stored with enough room so that if they change from
+ # beginning with 0, they don't have to grow.
+ push @$invlist, 0;
+ }
+ else {
$zero_or_one = 1;
}
print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name};
print $out_fh "\nstatic UV ${name}_invlist[] = {\n";
- print $out_fh "\t$count,\t/* Number of elements */\n";
+ print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
# This should be UV_MAX, but I (khw) am not confident that the suffixes
# for specifying the constant are portable, e.g. 'ull' on a 32 bit
@@ -56,8 +65,8 @@ sub output_invlist ($$) {
print $out_fh "\t0,\t/* Cache of previous search index result */\n";
print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n";
print $out_fh "\t", $zero_or_one,
- ",\t/* 0 if the list starts at 0;",
- "\n\t\t 1 if it starts at the element beyond 0 */\n";
+ ",\t/* 0 if this is the first element of the list proper;",
+ "\n\t\t 1 if the next element is the first */\n";
# The main body are the UVs passed in to this routine. Do the final
# element separately