diff options
author | David Mitchell <davem@iabyn.com> | 2017-02-14 17:10:34 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2017-02-14 17:49:58 +0000 |
commit | 77584140f7cbfe714083cacfa671085466e98a7b (patch) | |
tree | 0b046ae3941353d4a9d1a4d600f873d247406723 | |
parent | bb414e1295cbc3c4c2a55aaf82d832d6c8bf76ec (diff) | |
download | perl-77584140f7cbfe714083cacfa671085466e98a7b.tar.gz |
S_regmatch: eliminate WHILEM_A_min paren saving
In something like
"a1b2c3d4..." =~ /(?:(\w)(\d))*..../
A WHILEM state is pushed for each iteration of the '*'. Part of this
state saving includes the previous indices for each of the captures within
the body of the thing being iterated over. So we save the following sets of
values for $1,$2:
()()
(a)(1)
(b)(2)
(c)(3)
(d)(4)
Then if at any point we backtrack, we can undo one or more iterations and
restore the older values of $1,$2.
However, when the match is non-greedy, as in A*?B, then on failure of B
and backtracking we attempt *more* A's rather than removing some already
matched A's. So there's never any need to save all the current paren state
for each iteration.
This eliminates a lot of per-iteration overhead for minimal WHILEMs and
makes the following run about 25% faster:
$s = ("a" x 1000);
$s =~ /^(?:(.)(.))*?[XY]/ for 1..10_000;
-rw-r--r-- | regexec.c | 7 | ||||
-rw-r--r-- | t/perf/benchmarks | 11 |
2 files changed, 13 insertions, 5 deletions
@@ -7603,11 +7603,11 @@ NULL CACHEsayNO; NOT_REACHED; /* NOTREACHED */ - case WHILEM_A_min_fail: /* just failed to match A in a minimal match */ - /* FALLTHROUGH */ case WHILEM_A_pre_fail: /* just failed to match even minimal A */ REGCP_UNWIND(ST.lastcp); regcppop(rex, &maxopenparen); + /* FALLTHROUGH */ + case WHILEM_A_min_fail: /* just failed to match A in a minimal match */ cur_curlyx->u.curlyx.lastloc = ST.save_lastloc; cur_curlyx->u.curlyx.count--; CACHEsayNO; @@ -7661,9 +7661,6 @@ NULL ); /* Try grabbing another A and see if it helps. */ cur_curlyx->u.curlyx.lastloc = locinput; - ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, - maxopenparen); - REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(WHILEM_A_min, /*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS, locinput); diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 233f1fbdc5..dec29bf503 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1377,4 +1377,15 @@ setup => '$_ = ("0" x 100) . ("a" x 100);', code => '/[acgt]+/', }, + + 'regex::whilem::min_captures_fail' => { + desc => '/WHILEM with anon-greedy match and captures that fails', + setup => '$_ = ("a" x 20)', + code => '/^(?:(.)(.))*?[XY]/', + }, + 'regex::whilem::max_captures_fail' => { + desc => '/WHILEM with a greedy match and captures that fails', + setup => '$_ = ("a" x 20)', + code => '/^(?:(.)(.))*[XY]/', + }, ]; |