diff options
-rw-r--r-- | charclass_invlists.h | 238 | ||||
-rw-r--r-- | embed.fnc | 10 | ||||
-rw-r--r-- | inline_invlist.c | 21 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | regcomp.c | 124 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 32 |
6 files changed, 213 insertions, 216 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index b5d71afb16..57fbe47af1 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -13,9 +13,10 @@ static UV Latin1_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 0, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 0, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 256, 0 }; @@ -28,9 +29,10 @@ static UV AboveLatin1_invlist[] = { 1, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 256 }; @@ -42,9 +44,10 @@ static UV ASCII_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 0, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 0, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 128, 0 }; @@ -57,9 +60,10 @@ static UV L1Cased_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 65, 91, 97, @@ -86,9 +90,10 @@ static UV VertSpace_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 10, 14, 133, @@ -105,9 +110,10 @@ static UV PerlSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 14, 32, @@ -122,9 +128,10 @@ static UV XPerlSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 14, 32, @@ -157,9 +164,10 @@ static UV PosixAlnum_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -176,9 +184,10 @@ static UV L1PosixAlnum_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -207,9 +216,10 @@ static UV PosixAlpha_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 65, 91, 97, @@ -224,9 +234,10 @@ static UV L1PosixAlpha_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 65, 91, 97, @@ -253,9 +264,10 @@ static UV PosixBlank_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 10, 32, @@ -270,9 +282,10 @@ static UV XPosixBlank_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 10, 32, @@ -301,9 +314,10 @@ static UV PosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 0, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 0, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 32, 127, 128, @@ -318,9 +332,10 @@ static UV XPosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 0, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 0, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 32, 127, 160, @@ -335,9 +350,10 @@ static UV PosixDigit_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58 }; @@ -350,9 +366,10 @@ static UV PosixGraph_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 33, 127 }; @@ -365,9 +382,10 @@ static UV L1PosixGraph_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 33, 127, 161, @@ -382,9 +400,10 @@ static UV PosixLower_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 97, 123 }; @@ -397,9 +416,10 @@ static UV L1PosixLower_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 97, 123, 170, @@ -422,9 +442,10 @@ static UV PosixPrint_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 32, 127 }; @@ -437,9 +458,10 @@ static UV L1PosixPrint_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 32, 127, 160, @@ -454,9 +476,10 @@ static UV PosixPunct_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 33, 48, 58, @@ -475,9 +498,10 @@ static UV L1PosixPunct_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 33, 48, 58, @@ -508,9 +532,10 @@ static UV PosixSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 14, 32, @@ -525,9 +550,10 @@ static UV XPosixSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 9, 14, 32, @@ -560,9 +586,10 @@ static UV PosixUpper_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 65, 91 }; @@ -575,9 +602,10 @@ static UV L1PosixUpper_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 65, 91, 192, @@ -594,9 +622,10 @@ static UV PosixWord_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -615,9 +644,10 @@ static UV L1PosixWord_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -648,9 +678,10 @@ static UV PosixXDigit_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -667,9 +698,10 @@ static UV XPosixXDigit_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 48, 58, 65, @@ -690,9 +722,10 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = { 44, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 700, 701, 776, @@ -745,9 +778,10 @@ static UV _Perl_Multi_Char_Folds_invlist[] = { 58, /* Number of elements */ 0, /* Current iteration position */ 0, /* Cache of previous search index result */ - 290655244, /* Version and data structure type */ - 1, /* 0 if this is the first element of the list proper; - 1 if the next element is the first */ + 1039476070, /* Version and data structure type */ + 1, /* 0 if the list starts at 0; + 1 if it starts at the element beyond 0 */ + 0, 223, 224, 304, @@ -1089,7 +1089,7 @@ Ap |SV* |regclass_swash |NULLOK const regexp *prog \ |NULLOK SV **listsvp|NULLOK SV **altsvp #ifdef PERL_IN_REGCOMP_C EMsR |SV* |_new_invlist_C_array|NN UV* list -: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|bool complement_b +: Not used currently: EXMs |bool |_invlistEQ |NN SV* const a|NN SV* const b|const bool complement_b #endif Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ @@ -1452,9 +1452,13 @@ EiMR |UV |invlist_highest|NN SV* const invlist #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i -EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i +EXpM |void |_invlist_intersection_maybe_complement_2nd \ + |NULLOK SV* const a|NN SV* const b \ + |const bool complement_b|NN SV** i EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output -EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output +EXpM |void |_invlist_union_maybe_complement_2nd \ + |NULLOK SV* const a|NN SV* const b \ + |const bool complement_b|NN SV** output EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result EXpM |void |_invlist_invert|NN SV* const invlist EXpM |void |_invlist_invert_prop|NN SV* const invlist diff --git a/inline_invlist.c b/inline_invlist.c index b56ce60002..b194c0d264 100644 --- a/inline_invlist.c +++ b/inline_invlist.c @@ -20,20 +20,21 @@ * insert that at this location. Then, if an auxiliary program doesn't change * correspondingly, it will be discovered immediately */ #define INVLIST_VERSION_ID_OFFSET 3 -#define INVLIST_VERSION_ID 290655244 +#define INVLIST_VERSION_ID 1039476070 + +#define INVLIST_ZERO_OFFSET 4 /* 0 or 1 */ +/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list + * contains the code point U+00000, and begins at element [0] in the array, + * which always contains 0. If 1, the inversion list doesn't contain U+0000, + * and it begins at element [1]. Inverting an inversion list consists of + * adding or removing the 0 at the beginning of it. By reserving a space for + * that 0, inversion can be made very fast: we just flip this UV */ /* For safety, when adding new elements, remember to #undef them at the end of * the inversion list code section */ -#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */ -/* The UV at position ZERO contains either 0 or 1. If 0, the inversion list - * contains the code point U+00000, and begins here. If 1, the inversion list - * doesn't contain U+0000, and it begins at the next UV in the array. - * Inverting an inversion list consists of adding or removing the 0 at the - * beginning of it. By reserving a space for that 0, inversion can be made - * very fast */ - -#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 1) +#define HEADER_LENGTH (INVLIST_ZERO_OFFSET + 2) /* includes 1 for the constant + 0 element */ /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, * etc */ @@ -6862,7 +6862,7 @@ PERL_CALLCONV SV* Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); */ -PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i) +PERL_CALLCONV void Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_4); #define PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND \ @@ -6893,7 +6893,7 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); */ -PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) +PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_4); #define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \ @@ -7047,10 +7047,10 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) * list.) * Taking the complement (inverting) an inversion list is quite simple, if the * first element is 0, remove it; otherwise add a 0 element at the beginning. - * This implementation reserves an element at the beginning of each inversion - * list to contain 0 when the list contains 0, and contains 1 otherwise. The - * actual beginning of the list is either that element if 0, or the next one if - * 1. + * This implementation reserves an element (considered to be the final element + * of the header) at the beginning of each inversion list to always contain 0; + * there is an additional flag in the header which indicates if the list begins + * at the 0, or is offset to begin at the next element. * * More about inversion lists can be found in "Unicode Demystified" * Chapter 13 by Richard Gillam, published by Addison-Wesley. @@ -7075,11 +7075,11 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0) { /* Returns a pointer to the first element in the inversion list's array. * This is called upon initialization of an inversion list. Where the - * array begins depends on whether the list has the code point U+0000 - * in it or not. The other parameter tells it whether the code that - * follows this call is about to put a 0 in the inversion list or not. - * The first element is either the element with 0, if 0, or the next one, - * if 1 */ + * array begins depends on whether the list has the code point U+0000 in it + * or not. The other parameter tells it whether the code that follows this + * call is about to put a 0 in the inversion list or not. The first + * element is either the final part of the header reserved for 0, if TRUE, + * or the first element of the non-heading part, if FALSE */ UV* zero = get_invlist_zero_addr(invlist); @@ -7090,7 +7090,8 @@ S__invlist_array_init(pTHX_ SV* const invlist, const bool will_have_0) /* 1^1 = 0; 1^0 = 1 */ *zero = 1 ^ will_have_0; - return zero + *zero; + *(zero + 1) = 0; + return 1 + zero + *zero; } PERL_STATIC_INLINE UV* @@ -7108,10 +7109,12 @@ S_invlist_array(pTHX_ SV* const invlist) assert(*get_invlist_zero_addr(invlist) == 0 || *get_invlist_zero_addr(invlist) == 1); - /* The array begins either at the element reserved for zero if the - * list contains 0 (that element will be set to 0), or otherwise the next - * element (in which case the reserved element will be set to 1). */ - return (UV *) (get_invlist_zero_addr(invlist) + /* The array begins either at the header element reserved for zero or the + * element after that. The reserved element is 1 past the zero_addr + * element; the latter contains 0 or 1 to indicate how much additionally to + * add */ + assert(0 == *(1 + get_invlist_zero_addr(invlist))); + return (UV *) (1 + get_invlist_zero_addr(invlist) + *get_invlist_zero_addr(invlist)); } @@ -7127,19 +7130,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len) assert(len <= SvLEN(invlist)); SvCUR_set(invlist, TO_INTERNAL_SIZE(len)); - /* If the list contains U+0000, that element is part of the header, - * and should not be counted as part of the array. It will contain - * 0 in that case, and 1 otherwise. So we could flop 0=>1, 1=>0 and - * subtract: - * SvCUR_set(invlist, - * TO_INTERNAL_SIZE(len - * - (*get_invlist_zero_addr(inv_list) ^ 1))); - * But, this is only valid if len is not 0. The consequences of not doing - * this is that the memory allocation code may think that 1 more UV is - * being used than actually is, and so might do an unnecessary grow. That - * seems worth not bothering to make this the precise amount. - * - * Note that when inverting, SvCUR shouldn't change */ + /* Note that when inverting, SvCUR shouldn't change */ } PERL_STATIC_INLINE IV* @@ -7191,10 +7182,8 @@ S_invlist_max(pTHX_ SV* const invlist) PERL_STATIC_INLINE UV* S_get_invlist_zero_addr(pTHX_ SV* invlist) { - /* Return the address of the UV that is reserved to hold 0 if the inversion - * list contains 0. This has to be the last element of the heading, as the - * list proper starts with either it if 0, or the next element if not. - * (But we force it to contain either 0 or 1) */ + /* Return the address of the UV that says whether the inversion list is + * offset (it contains 1) or not (contains 0) */ PERL_ARGS_ASSERT_GET_INVLIST_ZERO_ADDR; @@ -7211,6 +7200,7 @@ Perl__new_invlist(pTHX_ IV initial_size) * system default is used instead */ SV* new_list; + UV* zero_addr; if (initial_size < 0) { initial_size = INVLIST_INITIAL_LEN; @@ -7225,11 +7215,13 @@ Perl__new_invlist(pTHX_ IV initial_size) /* This should force a segfault if a method doesn't initialize this * properly */ - *get_invlist_zero_addr(new_list) = UV_MAX; + zero_addr = get_invlist_zero_addr(new_list); + *zero_addr = UV_MAX; + *(zero_addr + 1) = 0; *get_invlist_previous_index_addr(new_list) = 0; *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID; -#if HEADER_LENGTH != 5 +#if HEADER_LENGTH != 6 # error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length #endif @@ -7554,7 +7546,7 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV } void -Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) +Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** output) { /* Take the union of two inversion lists and point <output> to it. *output * SHOULD BE DEFINED upon input, and if it points to one of the two lists, @@ -7576,8 +7568,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co * return the larger of the input lists, but then outside code might need * to keep track of whether to free the input list or not */ - UV* array_a; /* a's array */ - UV* array_b; + const UV* array_a; /* a's array */ + const UV* array_b; UV len_a; /* length of a's array */ UV len_b; @@ -7645,23 +7637,17 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co if (complement_b) { /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later, and clear the - * flag as we don't have to do anything else later */ + * do this, we just pretend the array starts one later */ if (array_b[0] == 0) { array_b++; len_b--; - complement_b = FALSE; } else { - /* But if the first element is not zero, we unshift a 0 before the - * array. The data structure reserves a space for that 0 (which - * should be a '1' right now), so physical shifting is unneeded, - * but temporarily change that element to 0. Before exiting the - * routine, we must restore the element to '1' */ + /* But if the first element is not zero, we pretend the list starts + * at the 0 that is always stored immediately before the array. */ array_b--; len_b++; - array_b[0] = 0; } } @@ -7778,11 +7764,6 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co } } - /* If we've changed b, restore it */ - if (complement_b) { - array_b[0] = 1; - } - /* We may be removing a reference to one of the inputs */ if (a == *output || b == *output) { assert(! invlist_is_iterating(*output)); @@ -7794,7 +7775,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool co } void -Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** i) +Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, const bool complement_b, SV** i) { /* Take the intersection of two inversion lists and point <i> to it. *i * SHOULD BE DEFINED upon input, and if it points to one of the two lists, @@ -7811,8 +7792,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * union above */ - UV* array_a; /* a's array */ - UV* array_b; + const UV* array_a; /* a's array */ + const UV* array_b; UV len_a; /* length of a's array */ UV len_b; @@ -7877,23 +7858,17 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, if (complement_b) { /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later, and clear the - * flag as we don't have to do anything else later */ + * do this, we just pretend the array starts one later */ if (array_b[0] == 0) { array_b++; len_b--; - complement_b = FALSE; } else { - /* But if the first element is not zero, we unshift a 0 before the - * array. The data structure reserves a space for that 0 (which - * should be a '1' right now), so physical shifting is unneeded, - * but temporarily change that element to 0. Before exiting the - * routine, we must restore the element to '1' */ + /* But if the first element is not zero, we pretend the list starts + * at the 0 that is always stored immediately before the array. */ array_b--; len_b++; - array_b[0] = 0; } } @@ -8000,11 +7975,6 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, } } - /* If we've changed b, restore it */ - if (complement_b) { - array_b[0] = 1; - } - /* We may be removing a reference to one of the inputs */ if (a == *i || b == *i) { assert(! invlist_is_iterating(*i)); @@ -8343,14 +8313,14 @@ Perl__invlist_dump(pTHX_ SV* const invlist, const char * const header) #if 0 bool -S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b) +S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) { /* Return a boolean as to if the two passed in inversion lists are * identical. The final argument, if TRUE, says to take the complement of * the second inversion list before doing the comparison */ - UV* array_a = invlist_array(a); - UV* array_b = invlist_array(b); + const UV* array_a = invlist_array(a); + const UV* array_b = invlist_array(b); UV len_a = _invlist_len(a); UV len_b = _invlist_len(b); @@ -8372,20 +8342,15 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b) /* Otherwise, to complement, we invert. Here, the first element is * 0, just remove it. To do this, we just pretend the array starts - * one later, and clear the flag as we don't have to do anything - * else later */ + * one later */ array_b++; len_b--; - complement_b = FALSE; } else { - /* But if the first element is not zero, we unshift a 0 before the - * array. The data structure reserves a space for that 0 (which - * should be a '1' right now), so physical shifting is unneeded, - * but temporarily change that element to 0. Before exiting the - * routine, we must restore the element to '1' */ + /* But if the first element is not zero, we pretend the list starts + * at the 0 that is always stored immediately before the array. */ array_b--; len_b++; array_b[0] = 0; @@ -8405,9 +8370,6 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool complement_b) } } - if (complement_b) { - array_b[0] = 1; - } return retval; } #endif diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 67b6e417fc..8e131d6f58 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -15,7 +15,7 @@ require 'regen/regen_lib.pl'; # in the headers is used to minimize the possibility of things getting # out-of-sync, or the wrong data structure being passed. Currently that # random number is: -my $VERSION_DATA_STRUCTURE_TYPE = 290655244; +my $VERSION_DATA_STRUCTURE_TYPE = 1039476070; my $out_fh = open_new('charclass_invlists.h', '>', {style => '*', by => $0, @@ -36,27 +36,23 @@ sub output_invlist ($$) { # Output the inversion list $invlist using the name $name for it. # It is output in the exact internal form for inversion lists. - my $zero_or_one; # Is the last element of the header 0, or 1 ? - - # If the first element is 0, it goes in the header, instead of the body - if ($invlist->[0] == 0) { - shift @$invlist; - - $zero_or_one = 0; - - # Add a dummy 0 at the end so that the length is constant. inversion - # lists are always stored with enough room so that if they change from - # beginning with 0, they don't have to grow. - push @$invlist, 0; - } - else { + # Is the last element of the header 0, or 1 ? + my $zero_or_one = 0; + my $count = @$invlist; + if ($invlist->[0] != 0) { + unshift @$invlist, 0; $zero_or_one = 1; } + else { # Temporary until we disentangle the header from the body. In the + # meantime, Adding this 0 at the end of lists that begin with 0 + # will keep the code from reading beyond the end of the array. + push @$invlist, 0; + } print $out_fh "\n#ifndef PERL_IN_XSUB_RE\n" unless exists $include_in_ext_re{$name}; print $out_fh "\nstatic UV ${name}_invlist[] = {\n"; - print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n"; + print $out_fh "\t$count,\t/* Number of elements */\n"; # This should be UV_MAX, but I (khw) am not confident that the suffixes # for specifying the constant are portable, e.g. 'ull' on a 32 bit @@ -65,8 +61,8 @@ sub output_invlist ($$) { print $out_fh "\t0,\t/* Cache of previous search index result */\n"; print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n"; print $out_fh "\t", $zero_or_one, - ",\t/* 0 if this is the first element of the list proper;", - "\n\t\t 1 if the next element is the first */\n"; + ",\t/* 0 if the list starts at 0;", + "\n\t\t 1 if it starts at the element beyond 0 */\n"; # The main body are the UVs passed in to this routine. Do the final # element separately |