diff options
-rw-r--r-- | charclass_invlists.h | 99 | ||||
-rw-r--r-- | embed.fnc | 5 | ||||
-rw-r--r-- | embed.h | 3 | ||||
-rw-r--r-- | inline_invlist.c | 8 | ||||
-rw-r--r-- | proto.h | 17 | ||||
-rw-r--r-- | regcomp.c | 82 | ||||
-rw-r--r-- | regen/mk_invlists.pl | 3 |
7 files changed, 174 insertions, 43 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index 03f2e02287..bbe2452c05 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -10,7 +10,8 @@ UV Latin1_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 0, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 256, @@ -20,7 +21,8 @@ UV Latin1_invlist[] = { UV AboveLatin1_invlist[] = { 1, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 256 @@ -29,7 +31,8 @@ UV AboveLatin1_invlist[] = { UV ASCII_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 0, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 128, @@ -39,7 +42,8 @@ UV ASCII_invlist[] = { UV L1Cased_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 65, @@ -63,7 +67,8 @@ UV L1Cased_invlist[] = { UV VertSpace_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 10, @@ -77,7 +82,8 @@ UV VertSpace_invlist[] = { UV PerlSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -89,7 +95,8 @@ UV PerlSpace_invlist[] = { UV XPerlSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -119,7 +126,8 @@ UV XPerlSpace_invlist[] = { UV PosixAlnum_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -133,7 +141,8 @@ UV PosixAlnum_invlist[] = { UV L1PosixAlnum_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -159,7 +168,8 @@ UV L1PosixAlnum_invlist[] = { UV PosixAlpha_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 65, @@ -171,7 +181,8 @@ UV PosixAlpha_invlist[] = { UV L1PosixAlpha_invlist[] = { 16, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 65, @@ -195,7 +206,8 @@ UV L1PosixAlpha_invlist[] = { UV PosixBlank_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -207,7 +219,8 @@ UV PosixBlank_invlist[] = { UV XPosixBlank_invlist[] = { 18, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -233,7 +246,8 @@ UV XPosixBlank_invlist[] = { UV PosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 0, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 32, @@ -245,7 +259,8 @@ UV PosixCntrl_invlist[] = { UV XPosixCntrl_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 0, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 32, @@ -257,7 +272,8 @@ UV XPosixCntrl_invlist[] = { UV PosixDigit_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -267,7 +283,8 @@ UV PosixDigit_invlist[] = { UV PosixGraph_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 33, @@ -277,7 +294,8 @@ UV PosixGraph_invlist[] = { UV L1PosixGraph_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 33, @@ -289,7 +307,8 @@ UV L1PosixGraph_invlist[] = { UV PosixLower_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 97, @@ -299,7 +318,8 @@ UV PosixLower_invlist[] = { UV L1PosixLower_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 97, @@ -319,7 +339,8 @@ UV L1PosixLower_invlist[] = { UV PosixPrint_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 32, @@ -329,7 +350,8 @@ UV PosixPrint_invlist[] = { UV L1PosixPrint_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 32, @@ -341,7 +363,8 @@ UV L1PosixPrint_invlist[] = { UV PosixPunct_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 33, @@ -357,7 +380,8 @@ UV PosixPunct_invlist[] = { UV L1PosixPunct_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 33, @@ -385,7 +409,8 @@ UV L1PosixPunct_invlist[] = { UV PosixSpace_invlist[] = { 4, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -397,7 +422,8 @@ UV PosixSpace_invlist[] = { UV XPosixSpace_invlist[] = { 22, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 9, @@ -427,7 +453,8 @@ UV XPosixSpace_invlist[] = { UV PosixUpper_invlist[] = { 2, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 65, @@ -437,7 +464,8 @@ UV PosixUpper_invlist[] = { UV L1PosixUpper_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 65, @@ -451,7 +479,8 @@ UV L1PosixUpper_invlist[] = { UV PosixWord_invlist[] = { 8, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -467,7 +496,8 @@ UV PosixWord_invlist[] = { UV L1PosixWord_invlist[] = { 20, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -495,7 +525,8 @@ UV L1PosixWord_invlist[] = { UV PosixXDigit_invlist[] = { 6, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -509,7 +540,8 @@ UV PosixXDigit_invlist[] = { UV XPosixXDigit_invlist[] = { 12, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 48, @@ -529,7 +561,8 @@ UV XPosixXDigit_invlist[] = { UV NonL1_Perl_Non_Final_Folds_invlist[] = { 44, /* Number of elements */ 0, /* Current iteration position */ - 1064334010, /* Version and data structure type */ + 0, /* Cache of previous search index result */ + 290655244, /* Version and data structure type */ 1, /* 0 if this is the first element of the list proper; 1 if the next element is the first */ 700, @@ -1391,7 +1391,10 @@ EiMR |UV* |invlist_array |NN SV* const invlist EsM |void |invlist_extend |NN SV* const invlist|const UV len EiMR |UV* |get_invlist_zero_addr |NN SV* invlist EiMR |UV |invlist_max |NN SV* const invlist -EiM |void |invlist_set_len |NN SV* const invlist|const UV len +EiM |void |invlist_set_len|NN SV* const invlist|const UV len +EiMR |IV* |get_invlist_previous_index_addr|NN SV* invlist +EiMR |IV |invlist_previous_index|NN SV* const invlist +EiM |void |invlist_set_previous_index|NN SV* const invlist|const IV index EiM |void |invlist_trim |NN SV* const invlist EiMR |SV* |invlist_clone |NN SV* const invlist EiMR |UV* |get_invlist_iter_addr |NN SV* invlist @@ -914,6 +914,7 @@ #define cl_or S_cl_or #define compute_EXACTish(a) S_compute_EXACTish(aTHX_ a) #define get_invlist_iter_addr(a) S_get_invlist_iter_addr(aTHX_ a) +#define get_invlist_previous_index_addr(a) S_get_invlist_previous_index_addr(aTHX_ a) #define get_invlist_version_id_addr(a) S_get_invlist_version_id_addr(aTHX_ a) #define get_invlist_zero_addr(a) S_get_invlist_zero_addr(aTHX_ a) #define grok_bslash_N(a,b,c,d,e,f) S_grok_bslash_N(aTHX_ a,b,c,d,e,f) @@ -924,7 +925,9 @@ #define invlist_iterinit(a) S_invlist_iterinit(aTHX_ a) #define invlist_iternext(a,b,c) S_invlist_iternext(aTHX_ a,b,c) #define invlist_max(a) S_invlist_max(aTHX_ a) +#define invlist_previous_index(a) S_invlist_previous_index(aTHX_ a) #define invlist_set_len(a,b) S_invlist_set_len(aTHX_ a,b) +#define invlist_set_previous_index(a,b) S_invlist_set_previous_index(aTHX_ a,b) #define invlist_trim(a) S_invlist_trim(aTHX_ a) #define join_exact(a,b,c,d,e,f,g) S_join_exact(aTHX_ a,b,c,d,e,f,g) #define make_trie(a,b,c,d,e,f,g,h) S_make_trie(aTHX_ a,b,c,d,e,f,g,h) diff --git a/inline_invlist.c b/inline_invlist.c index 80a6898e42..bb5ec17a9b 100644 --- a/inline_invlist.c +++ b/inline_invlist.c @@ -15,6 +15,8 @@ #define INVLIST_LEN_OFFSET 0 /* Number of elements in the inversion list */ #define INVLIST_ITER_OFFSET 1 /* Current iteration position */ +#define INVLIST_PREVIOUS_INDEX_OFFSET 2 /* Place to cache index of previous + result */ /* This is a combination of a version and data structure type, so that one * being passed in can be validated to be an inversion list of the correct @@ -22,13 +24,13 @@ * in the range 2**31-1 should be generated and the new() method changed to * insert that at this location. Then, if an auxiliary program doesn't change * correspondingly, it will be discovered immediately */ -#define INVLIST_VERSION_ID_OFFSET 2 -#define INVLIST_VERSION_ID 1064334010 +#define INVLIST_VERSION_ID_OFFSET 3 +#define INVLIST_VERSION_ID 290655244 /* For safety, when adding new elements, remember to #undef them at the end of * the inversion list code section */ -#define INVLIST_ZERO_OFFSET 3 /* 0 or 1; must be last element in header */ +#define INVLIST_ZERO_OFFSET 4 /* 0 or 1; must be last element in header */ /* The UV at position ZERO contains either 0 or 1. If 0, the inversion list * contains the code point U+00000, and begins here. If 1, the inversion list * doesn't contain U+0000, and it begins at the next UV in the array. @@ -6442,6 +6442,12 @@ PERL_STATIC_INLINE UV* S_get_invlist_iter_addr(pTHX_ SV* invlist) #define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR \ assert(invlist) +PERL_STATIC_INLINE IV* S_get_invlist_previous_index_addr(pTHX_ SV* invlist) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR \ + assert(invlist) + PERL_STATIC_INLINE UV* S_get_invlist_version_id_addr(pTHX_ SV* invlist) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); @@ -6502,11 +6508,22 @@ PERL_STATIC_INLINE UV S_invlist_max(pTHX_ SV* const invlist) #define PERL_ARGS_ASSERT_INVLIST_MAX \ assert(invlist) +PERL_STATIC_INLINE IV S_invlist_previous_index(pTHX_ SV* const invlist) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX \ + assert(invlist) + PERL_STATIC_INLINE void S_invlist_set_len(pTHX_ SV* const invlist, const UV len) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_INVLIST_SET_LEN \ assert(invlist) +PERL_STATIC_INLINE void S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX \ + assert(invlist) + PERL_STATIC_INLINE void S_invlist_trim(pTHX_ SV* const invlist) __attribute__nonnull__(pTHX_1); #define PERL_ARGS_ASSERT_INVLIST_TRIM \ @@ -7076,6 +7076,39 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len) * Note that when inverting, SvCUR shouldn't change */ } +PERL_STATIC_INLINE IV* +S_get_invlist_previous_index_addr(pTHX_ SV* invlist) +{ + /* Return the address of the UV that is reserved to hold the cached index + * */ + + PERL_ARGS_ASSERT_GET_INVLIST_PREVIOUS_INDEX_ADDR; + + return (IV *) (SvPVX(invlist) + (INVLIST_PREVIOUS_INDEX_OFFSET * sizeof (UV))); +} + +PERL_STATIC_INLINE IV +S_invlist_previous_index(pTHX_ SV* const invlist) +{ + /* Returns cached index of previous search */ + + PERL_ARGS_ASSERT_INVLIST_PREVIOUS_INDEX; + + return *get_invlist_previous_index_addr(invlist); +} + +PERL_STATIC_INLINE void +S_invlist_set_previous_index(pTHX_ SV* const invlist, const IV index) +{ + /* Caches <index> for later retrieval */ + + PERL_ARGS_ASSERT_INVLIST_SET_PREVIOUS_INDEX; + + assert(index == 0 || index < (int) _invlist_len(invlist)); + + *get_invlist_previous_index_addr(invlist) = index; +} + PERL_STATIC_INLINE UV S_invlist_max(pTHX_ SV* const invlist) { @@ -7126,8 +7159,9 @@ Perl__new_invlist(pTHX_ IV initial_size) * properly */ *get_invlist_zero_addr(new_list) = UV_MAX; + *get_invlist_previous_index_addr(new_list) = 0; *get_invlist_version_id_addr(new_list) = INVLIST_VERSION_ID; -#if HEADER_LENGTH != 4 +#if HEADER_LENGTH != 5 # error Need to regenerate VERSION_ID by running perl -E 'say int(rand 2**31-1)', and then changing the #if to the new length #endif @@ -7272,6 +7306,7 @@ Perl__invlist_search(pTHX_ SV* const invlist, const UV cp) * contains <cp> */ IV low = 0; + IV mid; IV high = _invlist_len(invlist); const IV highest_element = high - 1; const UV* array; @@ -7287,8 +7322,42 @@ Perl__invlist_search(pTHX_ SV* const invlist, const UV cp) * can't combine this with the test above, because we can't get the array * unless we know the list is non-empty) */ array = invlist_array(invlist); - if (cp < array[0]) { - return -1; + + mid = invlist_previous_index(invlist); + assert(mid >=0 && mid <= highest_element); + + /* <mid> contains the cache of the result of the previous call to this + * function (0 the first time). See if this call is for the same result, + * or if it is for mid-1. This is under the theory that calls to this + * function will often be for related code points that are near each other. + * And benchmarks show that caching gives better results. We also test + * here if the code point is within the bounds of the list. These tests + * replace others that would have had to be made anyway to make sure that + * the array bounds were not exceeded, and give us extra information at the + * same time */ + if (cp >= array[mid]) { + if (cp >= array[highest_element]) { + return highest_element; + } + + /* Here, array[mid] <= cp < array[highest_element]. This means that + * the final element is not the answer, so can exclude it; it also + * means that <mid> is not the final element, so can refer to 'mid + 1' + * safely */ + if (cp < array[mid + 1]) { + return mid; + } + high--; + low = mid + 1; + } + else { /* cp < aray[mid] */ + if (cp < array[0]) { /* Fail if outside the array */ + return -1; + } + high = mid; + if (cp >= array[mid - 1]) { + goto found_entry; + } } /* Binary search. What we are looking for is <i> such that @@ -7296,7 +7365,7 @@ Perl__invlist_search(pTHX_ SV* const invlist, const UV cp) * The loop below converges on the i+1. Note that there may not be an * (i+1)th element in the array, and things work nonetheless */ while (low < high) { - IV mid = (low + high) / 2; + mid = (low + high) / 2; assert(mid <= highest_element); if (array[mid] <= cp) { /* cp >= array[mid] */ low = mid + 1; @@ -7312,7 +7381,10 @@ Perl__invlist_search(pTHX_ SV* const invlist, const UV cp) } } - return high - 1; + found_entry: + high--; + invlist_set_previous_index(invlist, high); + return high; } void diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index 9cdbd4ca8e..b1e472319a 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -15,7 +15,7 @@ require 'regen/regen_lib.pl'; # in the headers is used to minimize the possibility of things getting # out-of-sync, or the wrong data structure being passed. Currently that # random number is: -my $VERSION_DATA_STRUCTURE_TYPE = 1064334010; +my $VERSION_DATA_STRUCTURE_TYPE = 290655244; my $out_fh = open_new('charclass_invlists.h', '>', {style => '*', by => $0, @@ -55,6 +55,7 @@ sub output_invlist ($$) { print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n"; print $out_fh "\t0,\t/* Current iteration position */\n"; + print $out_fh "\t0,\t/* Cache of previous search index result */\n"; print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n"; print $out_fh "\t", $zero_or_one, ",\t/* 0 if this is the first element of the list proper;", |