summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2017-02-14 17:10:34 +0000
committerDavid Mitchell <davem@iabyn.com>2017-02-14 17:49:58 +0000
commit77584140f7cbfe714083cacfa671085466e98a7b (patch)
tree0b046ae3941353d4a9d1a4d600f873d247406723
parentbb414e1295cbc3c4c2a55aaf82d832d6c8bf76ec (diff)
downloadperl-77584140f7cbfe714083cacfa671085466e98a7b.tar.gz
S_regmatch: eliminate WHILEM_A_min paren saving
In something like "a1b2c3d4..." =~ /(?:(\w)(\d))*..../ A WHILEM state is pushed for each iteration of the '*'. Part of this state saving includes the previous indices for each of the captures within the body of the thing being iterated over. So we save the following sets of values for $1,$2: ()() (a)(1) (b)(2) (c)(3) (d)(4) Then if at any point we backtrack, we can undo one or more iterations and restore the older values of $1,$2. However, when the match is non-greedy, as in A*?B, then on failure of B and backtracking we attempt *more* A's rather than removing some already matched A's. So there's never any need to save all the current paren state for each iteration. This eliminates a lot of per-iteration overhead for minimal WHILEMs and makes the following run about 25% faster: $s = ("a" x 1000); $s =~ /^(?:(.)(.))*?[XY]/ for 1..10_000;
-rw-r--r--regexec.c7
-rw-r--r--t/perf/benchmarks11
2 files changed, 13 insertions, 5 deletions
diff --git a/regexec.c b/regexec.c
index 73178b33d1..bd7887086f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -7603,11 +7603,11 @@ NULL
CACHEsayNO;
NOT_REACHED; /* NOTREACHED */
- case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
- /* FALLTHROUGH */
case WHILEM_A_pre_fail: /* just failed to match even minimal A */
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
+ /* FALLTHROUGH */
+ case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
cur_curlyx->u.curlyx.count--;
CACHEsayNO;
@@ -7661,9 +7661,6 @@ NULL
);
/* Try grabbing another A and see if it helps. */
cur_curlyx->u.curlyx.lastloc = locinput;
- ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
- maxopenparen);
- REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(WHILEM_A_min,
/*A*/ NEXTOPER(ST.save_curlyx->u.curlyx.me) + EXTRA_STEP_2ARGS,
locinput);
diff --git a/t/perf/benchmarks b/t/perf/benchmarks
index 233f1fbdc5..dec29bf503 100644
--- a/t/perf/benchmarks
+++ b/t/perf/benchmarks
@@ -1377,4 +1377,15 @@
setup => '$_ = ("0" x 100) . ("a" x 100);',
code => '/[acgt]+/',
},
+
+ 'regex::whilem::min_captures_fail' => {
+ desc => '/WHILEM with anon-greedy match and captures that fails',
+ setup => '$_ = ("a" x 20)',
+ code => '/^(?:(.)(.))*?[XY]/',
+ },
+ 'regex::whilem::max_captures_fail' => {
+ desc => '/WHILEM with a greedy match and captures that fails',
+ setup => '$_ = ("a" x 20)',
+ code => '/^(?:(.)(.))*[XY]/',
+ },
];