diff options
author | Karl Williamson <khw@cpan.org> | 2018-11-18 15:46:07 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-11-26 22:24:33 -0700 |
commit | daced5aea4695da4ec9e629bb3153a865d480075 (patch) | |
tree | c3633bb76fcdbbfa56a9feae21317944481bb5aa | |
parent | 5b864157a512a368408bbffe50b594a639cba56a (diff) | |
download | perl-daced5aea4695da4ec9e629bb3153a865d480075.tar.gz |
regexec.c: Use ANYOF bitmap lookup in more cases
ANYOFish nodes have a bitmap. If we know the value is in the bitmap
range, then flags that apply to out-of-range values are irrelevant.
Other flags being set indicate that the desired answer is more
complicated than just using a bitmap lookup. But exclude this
irrelevant flag from that calculation when we know the value is in the
bitmap.
There are other flags that it is possible to exclude, but not without
further conditionals, or unsharing code, and are either rarely set or
are for node types that we don't worry so much about optimal
performance, like /l and /d. The changes introduced by this commit
are determined at .c compile time except for a runtime mask, and hence
don't introduce new branches that may destroy the instruction cache
pipeline.
-rw-r--r-- | regexec.c | 9 |
1 files changed, 6 insertions, 3 deletions
@@ -2221,7 +2221,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */ reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)); } - else if (ANYOF_FLAGS(c)) { + else if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) { + /* We know that s is in the bitmap range since the target isn't + * UTF-8, so what happens for out-of-range values is not relevant, + * so exclude that from the flags */ REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0)); } else { @@ -6701,7 +6704,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (NEXTCHR_IS_EOS) sayNO; if ( (! utf8_target || UTF8_IS_INVARIANT(*locinput)) - && ! ANYOF_FLAGS(scan)) + && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP)) { if (! ANYOF_BITMAP_TEST(scan, * (U8 *) (locinput))) { sayNO; @@ -9363,7 +9366,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, hardcount++; } } - else if (ANYOF_FLAGS(p)) { + else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) { while (scan < loceol && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0)) scan++; |