diff options
-rw-r--r-- | charclass_invlists.h | 136 | ||||
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | inline_invlist.c | 14 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 27 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 8 |
6 files changed, 136 insertions, 53 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 88ebe67640..690526dd6e 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -11,7 +11,9 @@ static UV Latin1_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -24,7 +26,9 @@ static UV Latin1_invlist[] = { static UV AboveLatin1_invlist[] = { 1, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -37,7 +41,9 @@ static UV AboveLatin1_invlist[] = { static UV ASCII_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -50,7 +56,9 @@ static UV ASCII_invlist[] = { static UV L1Cased_invlist[] = { 16, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -78,7 +86,9 @@ static UV L1Cased_invlist[] = { static UV VertSpace_invlist[] = { 6, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -96,7 +106,9 @@ static UV VertSpace_invlist[] = { static UV PerlSpace_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -112,7 +124,9 @@ static UV PerlSpace_invlist[] = { static UV XPerlSpace_invlist[] = { 22, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -146,7 +160,9 @@ static UV XPerlSpace_invlist[] = { static UV PosixAlnum_invlist[] = { 6, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -164,7 +180,9 @@ static UV PosixAlnum_invlist[] = { static UV L1PosixAlnum_invlist[] = { 18, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -194,7 +212,9 @@ static UV L1PosixAlnum_invlist[] = { static UV PosixAlpha_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -210,7 +230,9 @@ static UV PosixAlpha_invlist[] = { static UV L1PosixAlpha_invlist[] = { 16, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -238,7 +260,9 @@ static UV L1PosixAlpha_invlist[] = { static UV PosixBlank_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -254,7 +278,9 @@ static UV PosixBlank_invlist[] = { static UV XPosixBlank_invlist[] = { 18, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -284,7 +310,9 @@ static UV XPosixBlank_invlist[] = { static UV PosixCntrl_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -299,7 +327,9 @@ static UV PosixCntrl_invlist[] = { static UV XPosixCntrl_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 0, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -314,7 +344,9 @@ static UV XPosixCntrl_invlist[] = { static UV PosixDigit_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -328,7 +360,9 @@ static UV PosixDigit_invlist[] = { static UV PosixGraph_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -342,7 +376,9 @@ static UV PosixGraph_invlist[] = { static UV L1PosixGraph_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -358,7 +394,9 @@ static UV L1PosixGraph_invlist[] = { static UV PosixLower_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -372,7 +410,9 @@ static UV PosixLower_invlist[] = { static UV L1PosixLower_invlist[] = { 12, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -396,7 +436,9 @@ static UV L1PosixLower_invlist[] = { static UV PosixPrint_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -410,7 +452,9 @@ static UV PosixPrint_invlist[] = { static UV L1PosixPrint_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -426,7 +470,9 @@ static UV L1PosixPrint_invlist[] = { static UV PosixPunct_invlist[] = { 8, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -446,7 +492,9 @@ static UV PosixPunct_invlist[] = { static UV L1PosixPunct_invlist[] = { 20, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -478,7 +526,9 @@ static UV L1PosixPunct_invlist[] = { static UV PosixSpace_invlist[] = { 4, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -494,7 +544,9 @@ static UV PosixSpace_invlist[] = { static UV XPosixSpace_invlist[] = { 22, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -528,7 +580,9 @@ static UV XPosixSpace_invlist[] = { static UV PosixUpper_invlist[] = { 2, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -542,7 +596,9 @@ static UV PosixUpper_invlist[] = { static UV L1PosixUpper_invlist[] = { 6, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -560,7 +616,9 @@ static UV L1PosixUpper_invlist[] = { static UV PosixWord_invlist[] = { 8, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -580,7 +638,9 @@ static UV PosixWord_invlist[] = { static UV L1PosixWord_invlist[] = { 20, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -612,7 +672,9 @@ static UV L1PosixWord_invlist[] = { static UV PosixXDigit_invlist[] = { 6, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -630,7 +692,9 @@ static UV PosixXDigit_invlist[] = { static UV XPosixXDigit_invlist[] = { 12, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -652,7 +716,9 @@ static UV XPosixXDigit_invlist[] = { static UV NonL1_Perl_Non_Final_Folds_invlist[] = { 44, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -706,7 +772,9 @@ static UV NonL1_Perl_Non_Final_Folds_invlist[] = { static UV _Perl_Multi_Char_Folds_invlist[] = { 58, /* Number of elements */ - 1511554547, /* Version and data structure type */ + 0, /* Current iteration position */ + 0, /* Cache of previous search index result */ + 1039476070, /* Version and data structure type */ 1, /* 0 if the list starts at 0; 1 if it starts at the element beyond 0 */ 0, @@ -1437,7 +1437,7 @@ EiM |void |invlist_set_previous_index|NN SV* const invlist|const IV index EiM |void |invlist_trim |NN SV* const invlist EiMR |SV* |invlist_clone |NN SV* const invlist EiMR |bool |invlist_is_iterating|NN SV* const invlist -EiMR |STRLEN*|get_invlist_iter_addr |NN SV* invlist +EiMR |UV* |get_invlist_iter_addr |NN SV* invlist EiM |void |invlist_iterinit|NN SV* invlist EsMR |bool |invlist_iternext|NN SV* invlist|NN UV* start|NN UV* end EiM |void |invlist_iterfinish|NN SV* invlist diff --git a/inline_invlist.c b/inline_invlist.c index 5d239bd6cd..f20f6daae0 100644 --- a/inline_invlist.c +++ b/inline_invlist.c @@ -9,16 +9,20 @@ #if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) #define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */ +#define INVLIST_ITER_OFFSET 1 /* Current iteration position */ +#define INVLIST_PREVIOUS_INDEX_OFFSET 2 /* Place to cache index of previous + result */ /* This is a combination of a version and data structure type, so that one * being passed in can be validated to be an inversion list of the correct * vintage. When the structure of the header is changed, a new random number - * in the range 2**31-1 should be generated. Then, if an auxiliary program - * doesn't change correspondingly, it will be discovered immediately */ -#define INVLIST_VERSION_ID_OFFSET 1 -#define INVLIST_VERSION_ID 1511554547 + * in the range 2**31-1 should be generated and the new() method changed to + * insert that at this location. Then, if an auxiliary program doesn't change + * correspondingly, it will be discovered immediately */ +#define INVLIST_VERSION_ID_OFFSET 3 +#define INVLIST_VERSION_ID 1039476070 -#define INVLIST_ZERO_OFFSET 2 /* 0 or 1 */ +#define INVLIST_ZERO_OFFSET 4 /* 0 or 1 */ /* The UV at position ZERO contains either 0 or 1. If 0, the inversion list * contains the code point U+00000, and begins at element [0] in the array, * which always contains 0. If 1, the inversion list doesn't contain U+0000, @@ -6480,7 +6480,7 @@ STATIC bool S_could_it_be_a_POSIX_class(pTHX_ struct RExC_state_t *pRExC_state) #define PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS \ assert(pRExC_state) -PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(pTHX_ SV* invlist) +PERL_STATIC_INLINE UV* S_get_invlist_iter_addr(pTHX_ SV* invlist) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR \ @@ -7144,7 +7144,7 @@ S_get_invlist_previous_index_addr(pTHX_ SV* invlist) PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR; - return &(((XPVLV*) SvANY(invlist))->xiv_u.xivu_iv); + return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV))); } PERL_STATIC_INLINE IV @@ -7216,7 +7216,7 @@ Perl__new_invlist(pTHX_ IV initial_size) invlist_set_len(new_list, 0); /* Force iterinit() to be used to get iteration to work */ - *get_invlist_iter_addr(new_list) = (STRLEN) UV_MAX; + *get_invlist_iter_addr(new_list) = UV_MAX; /* This should force a segfault if a method doesn't initialize this * properly */ @@ -7225,7 +7225,7 @@ Perl__new_invlist(pTHX_ IV initial_size) *(zero_addr + 1) = 0; *get_invlist_previous_index_addr(new_list) = 0; -#if HEADER_LENGTH != 4 +#if HEADER_LENGTH != 6 # error Need to regenerate INVLIST_VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length #endif @@ -7254,9 +7254,12 @@ S__new_invlist_C_array(pTHX_ UV* list) Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list"); } invlist_set_len(invlist, list[INVLIST_LEN_OFFSET]); - invlist_set_previous_index(invlist, 0); - /* Initialize the iteration pointer. */ + /* Initialize the iteration pointer. + * XXX This could be done at compile time in charclass_invlists.h, but I + * (khw) am not confident that the suffixes for specifying the C constant + * UV_MAX are portable, e.g. 'ull' on a 32 bit machine that is configured + * to use 64 bits; might need a Configure probe */ invlist_iterfinish(invlist); return invlist; @@ -8128,7 +8131,7 @@ S_invlist_clone(pTHX_ SV* const invlist) return new_invlist; } -PERL_STATIC_INLINE STRLEN* +PERL_STATIC_INLINE UV* S_get_invlist_iter_addr(pTHX_ SV* invlist) { /* Return the address of the UV that contains the current iteration @@ -8136,7 +8139,7 @@ S_get_invlist_iter_addr(pTHX_ SV* invlist) PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; - return &(LvTARGOFF(invlist)); + return (UV *) (SvPVX(invlist) + (INVLIST_ITER_OFFSET * sizeof (UV))); } PERL_STATIC_INLINE void @@ -8160,7 +8163,7 @@ S_invlist_iterfinish(pTHX_ SV* invlist) PERL_ARGS_ASSERT_INVLIST_ITERFINISH; - *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; + *get_invlist_iter_addr(invlist) = UV_MAX; } STATIC bool @@ -8173,14 +8176,14 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end) * <*start> and <*end> are unchanged, and the next call to this function * will start over at the beginning of the list */ - STRLEN* pos = get_invlist_iter_addr(invlist); + UV* pos = get_invlist_iter_addr(invlist); UV len = _invlist_len(invlist); UV *array; PERL_ARGS_ASSERT_INVLIST_ITERNEXT; if (*pos >= len) { - *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ + *pos = UV_MAX; /* Force iterinit() to be required next time */ return FALSE; } @@ -8203,7 +8206,7 @@ S_invlist_is_iterating(pTHX_ SV* const invlist) { PERL_ARGS_ASSERT_INVLIST_IS_ITERATING; - return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX; + return *(get_invlist_iter_addr(invlist)) < UV_MAX; } PERL_STATIC_INLINE UV @@ -8373,7 +8376,9 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b) #undef FROM_INTERNAL_SIZE #undef INVLIST_LEN_OFFSET #undef INVLIST_ZERO_OFFSET +#undef INVLIST_ITER_OFFSET #undef INVLIST_VERSION_ID +#undef INVLIST_PREVIOUS_INDEX_OFFSET /* End of inversion list object */ diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index f1e1bd9843..27c080233c 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -15,7 +15,7 @@ require 'regen/regen_lib.pl'; # in the headers is used to minimize the possibility of things getting # out-of-sync, or the wrong data structure being passed. Currently that # random number is: -my $VERSION_DATA_STRUCTURE_TYPE = 1511554547; +my $VERSION_DATA_STRUCTURE_TYPE = 1039476070; my $out_fh = open_new('charclass_invlists.h', '>', {style => '*', by => $0, @@ -48,6 +48,12 @@ sub output_invlist ($$) { print $out_fh "\nstatic UV ${name}_invlist[] = {\n"; print $out_fh "\t$count,\t/* Number of elements */\n"; + + # This should be UV_MAX, but I (khw) am not confident that the suffixes + # for specifying the constant are portable, e.g. 'ull' on a 32 bit + # machine that is configured to use 64 bits; might need a Configure probe + print $out_fh "\t0,\t/* Current iteration position */\n"; + print $out_fh "\t0,\t/* Cache of previous search index result */\n"; print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n"; print $out_fh "\t", $zero_or_one, ",\t/* 0 if the list starts at 0;", |