summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2017-02-14 15:59:57 +0000
committerDavid Mitchell <davem@iabyn.com>2017-02-14 17:49:58 +0000
commit4ee16520199a0e11bf4dbdbac71f0a64e5510271 (patch)
tree3798512c4f271dc8c418b10a336811ed377c101a
parentf4197b8e1d1f984eb6635ea41ce14394432b96b1 (diff)
downloadperl-4ee16520199a0e11bf4dbdbac71f0a64e5510271.tar.gz
clear savestack on (?{...}) failure and backtrack
RT #126697 In a regex, after executing a (?{...}) code block, if we fail and backtrack over the codeblock, we're supposed to unwind the savestack, so that for any example any local()s within the code block are undone. It turns out that a backtracking state isn't pushed for (?{...}), only for postponed evals ( i.e. (??{...})). This means that it relies on one of the earlier backtracking states to clear the savestack on its behalf. This can't always be relied upon, and the ticket above contains code where this falls down; in particular: 'ABC' =~ m{ \A (?: (?: AB | A | BC ) (?{ local $count = $count + 1; print "! count=$count; ; pos=${\pos}\n"; }) )* \z }x Here we end up relying on TRIE_next to do the cleaning up, but TRIE_next doesn't, since there's nothing it would be responsible for that needs cleaning up. The solution to this is to push a backtrack state for every (?{...}) as well as every (??{...}). The sole job of that state is to do a LEAVE_SCOPE(ST.lastcp). The existing backtrack state EVAL_AB has been renamed EVAL_postponed_AB to make it clear it's only used on postponed /(??{A})B/ regexes, and a new state has been added, EVAL_B, which is only called when backtracking after failing something in the B in /(?{...})B/.
-rw-r--r--regcomp.sym2
-rw-r--r--regexec.c25
-rw-r--r--regnodes.h164
-rw-r--r--t/re/pat_re_eval.t22
4 files changed, 122 insertions, 91 deletions
diff --git a/regcomp.sym b/regcomp.sym
index ac67955270..999d965565 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -243,7 +243,7 @@ PSEUDO PSEUDO, off ; Pseudo opcode for internal use.
#
#
TRIE next:FAIL
-EVAL AB:FAIL
+EVAL B,postponed_AB:FAIL
CURLYX end:FAIL
WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL
BRANCH next:FAIL
diff --git a/regexec.c b/regexec.c
index bf9809a9c7..e38365516e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -5391,7 +5391,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
U8 gimme = G_SCALAR;
CV *caller_cv = NULL; /* who called us */
CV *last_pushed_cv = NULL; /* most recently called (?{}) CV */
- CHECKPOINT runops_cp; /* savestack position before executing EVAL */
U32 maxopenparen = 0; /* max '(' index seen so far */
int to_complement; /* Invert the result? */
_char_class_number classnum;
@@ -6787,7 +6786,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
goto eval_recurse_doit;
/* NOTREACHED */
- case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
+ case EVAL: /* /(?{...})B/ /(??{A})B/ and /(?(?{...})X|Y)B/ */
if (cur_eval && cur_eval->locinput==locinput) {
if ( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
@@ -6806,7 +6805,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* save *all* paren positions */
regcppush(rex, 0, maxopenparen);
- REGCP_SET(runops_cp);
+ REGCP_SET(ST.lastcp);
if (!caller_cv)
caller_cv = find_runcv(NULL);
@@ -7008,12 +7007,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
* in the regexp code uses the pad ! */
PL_op = oop;
PL_curcop = ocurcop;
- regcp_restore(rex, runops_cp, &maxopenparen);
+ regcp_restore(rex, ST.lastcp, &maxopenparen);
PL_curpm_under = PL_curpm;
PL_curpm = PL_reg_curpm;
- if (logical != 2)
- break;
+ if (logical != 2) {
+ PUSH_STATE_GOTO(EVAL_B, next, locinput);
+ /* NOTREACHED */
+ }
}
/* only /(??{})/ from now on */
@@ -7111,11 +7112,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
ST.prev_eval = cur_eval;
cur_eval = st;
/* now continue from first node in postoned RE */
- PUSH_YES_STATE_GOTO(EVAL_AB, startpoint, locinput);
+ PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput);
NOT_REACHED; /* NOTREACHED */
}
- case EVAL_AB: /* cleanup after a successful (??{A})B */
+ case EVAL_postponed_AB: /* cleanup after a successful (??{A})B */
/* note: this is called twice; first after popping B, then A */
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_ "EVAL_AB cur_eval=%p prev_eval=%p\n",
@@ -7161,7 +7162,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
sayYES;
- case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
+ case EVAL_B_fail: /* unsuccessful B in (?{...})B */
+ REGCP_UNWIND(ST.lastcp);
+ sayNO;
+
+ case EVAL_postponed_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
/* note: this is called twice; first after popping B, then A */
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_ "EVAL_AB_fail cur_eval=%p prev_eval=%p\n",
@@ -8263,7 +8268,7 @@ NULL
SET_RECURSE_LOCINPUT("FAKE-END[after]", cur_eval->locinput);
- PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B,
+ PUSH_YES_STATE_GOTO(EVAL_postponed_AB, st->u.eval.prev_eval->u.eval.B,
locinput); /* match B */
}
diff --git a/regnodes.h b/regnodes.h
index f820c5684e..8fe0f41d1b 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -7,7 +7,7 @@
/* Regops and State definitions */
#define REGNODE_MAX 92
-#define REGMATCH_STATE_MAX 132
+#define REGMATCH_STATE_MAX 134
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -107,44 +107,46 @@
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
-#define EVAL_AB (REGNODE_MAX + 3) /* state for EVAL */
-#define EVAL_AB_fail (REGNODE_MAX + 4) /* state for EVAL */
-#define CURLYX_end (REGNODE_MAX + 5) /* state for CURLYX */
-#define CURLYX_end_fail (REGNODE_MAX + 6) /* state for CURLYX */
-#define WHILEM_A_pre (REGNODE_MAX + 7) /* state for WHILEM */
-#define WHILEM_A_pre_fail (REGNODE_MAX + 8) /* state for WHILEM */
-#define WHILEM_A_min (REGNODE_MAX + 9) /* state for WHILEM */
-#define WHILEM_A_min_fail (REGNODE_MAX + 10) /* state for WHILEM */
-#define WHILEM_A_max (REGNODE_MAX + 11) /* state for WHILEM */
-#define WHILEM_A_max_fail (REGNODE_MAX + 12) /* state for WHILEM */
-#define WHILEM_B_min (REGNODE_MAX + 13) /* state for WHILEM */
-#define WHILEM_B_min_fail (REGNODE_MAX + 14) /* state for WHILEM */
-#define WHILEM_B_max (REGNODE_MAX + 15) /* state for WHILEM */
-#define WHILEM_B_max_fail (REGNODE_MAX + 16) /* state for WHILEM */
-#define BRANCH_next (REGNODE_MAX + 17) /* state for BRANCH */
-#define BRANCH_next_fail (REGNODE_MAX + 18) /* state for BRANCH */
-#define CURLYM_A (REGNODE_MAX + 19) /* state for CURLYM */
-#define CURLYM_A_fail (REGNODE_MAX + 20) /* state for CURLYM */
-#define CURLYM_B (REGNODE_MAX + 21) /* state for CURLYM */
-#define CURLYM_B_fail (REGNODE_MAX + 22) /* state for CURLYM */
-#define IFMATCH_A (REGNODE_MAX + 23) /* state for IFMATCH */
-#define IFMATCH_A_fail (REGNODE_MAX + 24) /* state for IFMATCH */
-#define CURLY_B_min_known (REGNODE_MAX + 25) /* state for CURLY */
-#define CURLY_B_min_known_fail (REGNODE_MAX + 26) /* state for CURLY */
-#define CURLY_B_min (REGNODE_MAX + 27) /* state for CURLY */
-#define CURLY_B_min_fail (REGNODE_MAX + 28) /* state for CURLY */
-#define CURLY_B_max (REGNODE_MAX + 29) /* state for CURLY */
-#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
-#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
-#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
-#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
-#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
-#define SKIP_next (REGNODE_MAX + 35) /* state for SKIP */
-#define SKIP_next_fail (REGNODE_MAX + 36) /* state for SKIP */
-#define CUTGROUP_next (REGNODE_MAX + 37) /* state for CUTGROUP */
-#define CUTGROUP_next_fail (REGNODE_MAX + 38) /* state for CUTGROUP */
-#define KEEPS_next (REGNODE_MAX + 39) /* state for KEEPS */
-#define KEEPS_next_fail (REGNODE_MAX + 40) /* state for KEEPS */
+#define EVAL_B (REGNODE_MAX + 3) /* state for EVAL */
+#define EVAL_B_fail (REGNODE_MAX + 4) /* state for EVAL */
+#define EVAL_postponed_AB (REGNODE_MAX + 5) /* state for EVAL */
+#define EVAL_postponed_AB_fail (REGNODE_MAX + 6) /* state for EVAL */
+#define CURLYX_end (REGNODE_MAX + 7) /* state for CURLYX */
+#define CURLYX_end_fail (REGNODE_MAX + 8) /* state for CURLYX */
+#define WHILEM_A_pre (REGNODE_MAX + 9) /* state for WHILEM */
+#define WHILEM_A_pre_fail (REGNODE_MAX + 10) /* state for WHILEM */
+#define WHILEM_A_min (REGNODE_MAX + 11) /* state for WHILEM */
+#define WHILEM_A_min_fail (REGNODE_MAX + 12) /* state for WHILEM */
+#define WHILEM_A_max (REGNODE_MAX + 13) /* state for WHILEM */
+#define WHILEM_A_max_fail (REGNODE_MAX + 14) /* state for WHILEM */
+#define WHILEM_B_min (REGNODE_MAX + 15) /* state for WHILEM */
+#define WHILEM_B_min_fail (REGNODE_MAX + 16) /* state for WHILEM */
+#define WHILEM_B_max (REGNODE_MAX + 17) /* state for WHILEM */
+#define WHILEM_B_max_fail (REGNODE_MAX + 18) /* state for WHILEM */
+#define BRANCH_next (REGNODE_MAX + 19) /* state for BRANCH */
+#define BRANCH_next_fail (REGNODE_MAX + 20) /* state for BRANCH */
+#define CURLYM_A (REGNODE_MAX + 21) /* state for CURLYM */
+#define CURLYM_A_fail (REGNODE_MAX + 22) /* state for CURLYM */
+#define CURLYM_B (REGNODE_MAX + 23) /* state for CURLYM */
+#define CURLYM_B_fail (REGNODE_MAX + 24) /* state for CURLYM */
+#define IFMATCH_A (REGNODE_MAX + 25) /* state for IFMATCH */
+#define IFMATCH_A_fail (REGNODE_MAX + 26) /* state for IFMATCH */
+#define CURLY_B_min_known (REGNODE_MAX + 27) /* state for CURLY */
+#define CURLY_B_min_known_fail (REGNODE_MAX + 28) /* state for CURLY */
+#define CURLY_B_min (REGNODE_MAX + 29) /* state for CURLY */
+#define CURLY_B_min_fail (REGNODE_MAX + 30) /* state for CURLY */
+#define CURLY_B_max (REGNODE_MAX + 31) /* state for CURLY */
+#define CURLY_B_max_fail (REGNODE_MAX + 32) /* state for CURLY */
+#define COMMIT_next (REGNODE_MAX + 33) /* state for COMMIT */
+#define COMMIT_next_fail (REGNODE_MAX + 34) /* state for COMMIT */
+#define MARKPOINT_next (REGNODE_MAX + 35) /* state for MARKPOINT */
+#define MARKPOINT_next_fail (REGNODE_MAX + 36) /* state for MARKPOINT */
+#define SKIP_next (REGNODE_MAX + 37) /* state for SKIP */
+#define SKIP_next_fail (REGNODE_MAX + 38) /* state for SKIP */
+#define CUTGROUP_next (REGNODE_MAX + 39) /* state for CUTGROUP */
+#define CUTGROUP_next_fail (REGNODE_MAX + 40) /* state for CUTGROUP */
+#define KEEPS_next (REGNODE_MAX + 41) /* state for KEEPS */
+#define KEEPS_next_fail (REGNODE_MAX + 42) /* state for KEEPS */
/* PL_regkind[] What type of regop or state is this. */
@@ -248,8 +250,10 @@ EXTCONST U8 PL_regkind[] = {
/* ------------ States ------------- */
TRIE, /* TRIE_next */
TRIE, /* TRIE_next_fail */
- EVAL, /* EVAL_AB */
- EVAL, /* EVAL_AB_fail */
+ EVAL, /* EVAL_B */
+ EVAL, /* EVAL_B_fail */
+ EVAL, /* EVAL_postponed_AB */
+ EVAL, /* EVAL_postponed_AB_fail */
CURLYX, /* CURLYX_end */
CURLYX, /* CURLYX_end_fail */
WHILEM, /* WHILEM_A_pre */
@@ -592,44 +596,46 @@ EXTCONST char * const PL_reg_name[] = {
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
- "EVAL_AB", /* REGNODE_MAX +0x03 */
- "EVAL_AB_fail", /* REGNODE_MAX +0x04 */
- "CURLYX_end", /* REGNODE_MAX +0x05 */
- "CURLYX_end_fail", /* REGNODE_MAX +0x06 */
- "WHILEM_A_pre", /* REGNODE_MAX +0x07 */
- "WHILEM_A_pre_fail", /* REGNODE_MAX +0x08 */
- "WHILEM_A_min", /* REGNODE_MAX +0x09 */
- "WHILEM_A_min_fail", /* REGNODE_MAX +0x0a */
- "WHILEM_A_max", /* REGNODE_MAX +0x0b */
- "WHILEM_A_max_fail", /* REGNODE_MAX +0x0c */
- "WHILEM_B_min", /* REGNODE_MAX +0x0d */
- "WHILEM_B_min_fail", /* REGNODE_MAX +0x0e */
- "WHILEM_B_max", /* REGNODE_MAX +0x0f */
- "WHILEM_B_max_fail", /* REGNODE_MAX +0x10 */
- "BRANCH_next", /* REGNODE_MAX +0x11 */
- "BRANCH_next_fail", /* REGNODE_MAX +0x12 */
- "CURLYM_A", /* REGNODE_MAX +0x13 */
- "CURLYM_A_fail", /* REGNODE_MAX +0x14 */
- "CURLYM_B", /* REGNODE_MAX +0x15 */
- "CURLYM_B_fail", /* REGNODE_MAX +0x16 */
- "IFMATCH_A", /* REGNODE_MAX +0x17 */
- "IFMATCH_A_fail", /* REGNODE_MAX +0x18 */
- "CURLY_B_min_known", /* REGNODE_MAX +0x19 */
- "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1a */
- "CURLY_B_min", /* REGNODE_MAX +0x1b */
- "CURLY_B_min_fail", /* REGNODE_MAX +0x1c */
- "CURLY_B_max", /* REGNODE_MAX +0x1d */
- "CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
- "COMMIT_next", /* REGNODE_MAX +0x1f */
- "COMMIT_next_fail", /* REGNODE_MAX +0x20 */
- "MARKPOINT_next", /* REGNODE_MAX +0x21 */
- "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
- "SKIP_next", /* REGNODE_MAX +0x23 */
- "SKIP_next_fail", /* REGNODE_MAX +0x24 */
- "CUTGROUP_next", /* REGNODE_MAX +0x25 */
- "CUTGROUP_next_fail", /* REGNODE_MAX +0x26 */
- "KEEPS_next", /* REGNODE_MAX +0x27 */
- "KEEPS_next_fail", /* REGNODE_MAX +0x28 */
+ "EVAL_B", /* REGNODE_MAX +0x03 */
+ "EVAL_B_fail", /* REGNODE_MAX +0x04 */
+ "EVAL_postponed_AB", /* REGNODE_MAX +0x05 */
+ "EVAL_postponed_AB_fail", /* REGNODE_MAX +0x06 */
+ "CURLYX_end", /* REGNODE_MAX +0x07 */
+ "CURLYX_end_fail", /* REGNODE_MAX +0x08 */
+ "WHILEM_A_pre", /* REGNODE_MAX +0x09 */
+ "WHILEM_A_pre_fail", /* REGNODE_MAX +0x0a */
+ "WHILEM_A_min", /* REGNODE_MAX +0x0b */
+ "WHILEM_A_min_fail", /* REGNODE_MAX +0x0c */
+ "WHILEM_A_max", /* REGNODE_MAX +0x0d */
+ "WHILEM_A_max_fail", /* REGNODE_MAX +0x0e */
+ "WHILEM_B_min", /* REGNODE_MAX +0x0f */
+ "WHILEM_B_min_fail", /* REGNODE_MAX +0x10 */
+ "WHILEM_B_max", /* REGNODE_MAX +0x11 */
+ "WHILEM_B_max_fail", /* REGNODE_MAX +0x12 */
+ "BRANCH_next", /* REGNODE_MAX +0x13 */
+ "BRANCH_next_fail", /* REGNODE_MAX +0x14 */
+ "CURLYM_A", /* REGNODE_MAX +0x15 */
+ "CURLYM_A_fail", /* REGNODE_MAX +0x16 */
+ "CURLYM_B", /* REGNODE_MAX +0x17 */
+ "CURLYM_B_fail", /* REGNODE_MAX +0x18 */
+ "IFMATCH_A", /* REGNODE_MAX +0x19 */
+ "IFMATCH_A_fail", /* REGNODE_MAX +0x1a */
+ "CURLY_B_min_known", /* REGNODE_MAX +0x1b */
+ "CURLY_B_min_known_fail", /* REGNODE_MAX +0x1c */
+ "CURLY_B_min", /* REGNODE_MAX +0x1d */
+ "CURLY_B_min_fail", /* REGNODE_MAX +0x1e */
+ "CURLY_B_max", /* REGNODE_MAX +0x1f */
+ "CURLY_B_max_fail", /* REGNODE_MAX +0x20 */
+ "COMMIT_next", /* REGNODE_MAX +0x21 */
+ "COMMIT_next_fail", /* REGNODE_MAX +0x22 */
+ "MARKPOINT_next", /* REGNODE_MAX +0x23 */
+ "MARKPOINT_next_fail", /* REGNODE_MAX +0x24 */
+ "SKIP_next", /* REGNODE_MAX +0x25 */
+ "SKIP_next_fail", /* REGNODE_MAX +0x26 */
+ "CUTGROUP_next", /* REGNODE_MAX +0x27 */
+ "CUTGROUP_next_fail", /* REGNODE_MAX +0x28 */
+ "KEEPS_next", /* REGNODE_MAX +0x29 */
+ "KEEPS_next_fail", /* REGNODE_MAX +0x2a */
};
#endif /* DOINIT */
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index f23a79c51b..6921d38bac 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -22,7 +22,7 @@ BEGIN {
}
-plan tests => 532; # Update this when adding/deleting tests.
+plan tests => 533; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1271,6 +1271,26 @@ sub run_tests {
}->();
+ # RT #126697
+ # savestack wasn't always being unwound on EVAL failure
+ {
+ local our $i = 0;
+ my $max = 0;
+
+ 'ABC' =~ m{
+ \A
+ (?:
+ (?: AB | A | BC )
+ (?{
+ local $i = $i + 1;
+ $max = $i if $max < $i;
+ })
+ )*
+ \z
+ }x;
+ is $max, 2, "RT #126697";
+ }
+
} # End of sub run_tests