summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-11-18 15:46:07 -0700
committerKarl Williamson <khw@cpan.org>2018-11-26 22:24:33 -0700
commitdaced5aea4695da4ec9e629bb3153a865d480075 (patch)
treec3633bb76fcdbbfa56a9feae21317944481bb5aa
parent5b864157a512a368408bbffe50b594a639cba56a (diff)
downloadperl-daced5aea4695da4ec9e629bb3153a865d480075.tar.gz
regexec.c: Use ANYOF bitmap lookup in more cases
ANYOFish nodes have a bitmap. If we know the value is in the bitmap range, then flags that apply to out-of-range values are irrelevant. Other flags being set indicate that the desired answer is more complicated than just using a bitmap lookup. But exclude this irrelevant flag from that calculation when we know the value is in the bitmap. There are other flags that it is possible to exclude, but not without further conditionals, or unsharing code, and are either rarely set or are for node types that we don't worry so much about optimal performance, like /l and /d. The changes introduced by this commit are determined at .c compile time except for a runtime mask, and hence don't introduce new branches that may destroy the instruction cache pipeline.
-rw-r--r--regexec.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index 32cea3fe33..1756a95b7f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2221,7 +2221,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_CLASS_SCAN(1, /* 1=>is-utf8 */
reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
}
- else if (ANYOF_FLAGS(c)) {
+ else if (ANYOF_FLAGS(c) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+ /* We know that s is in the bitmap range since the target isn't
+ * UTF-8, so what happens for out-of-range values is not relevant,
+ * so exclude that from the flags */
REXEC_FBC_CLASS_SCAN(0, reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
}
else {
@@ -6701,7 +6704,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
if (NEXTCHR_IS_EOS)
sayNO;
if ( (! utf8_target || UTF8_IS_INVARIANT(*locinput))
- && ! ANYOF_FLAGS(scan))
+ && ! (ANYOF_FLAGS(scan) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP))
{
if (! ANYOF_BITMAP_TEST(scan, * (U8 *) (locinput))) {
sayNO;
@@ -9363,7 +9366,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
hardcount++;
}
}
- else if (ANYOF_FLAGS(p)) {
+ else if (ANYOF_FLAGS(p) & ~ ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
while (scan < loceol
&& reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
scan++;