summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-03-13 07:19:22 +0100
committerYves Orton <demerphq@gmail.com>2022-04-08 12:29:21 +0800
commit02ae92405527bc342059f65d0665647b7d5a7ec9 (patch)
treef28eccf347788cf37a8a108c4ec03b30850d6572 /regexec.c
parent7ed70d6557055a15563eb2369e156b6970a6498b (diff)
downloadperl-02ae92405527bc342059f65d0665647b7d5a7ec9.tar.gz
regexec.c: ACCEPT inside of a (...)+ should stop looping (CURLYM optimization)
GH Issue #19484 reported that print "ABDE" =~ /(A (A|B(*ACCEPT)|C)+ D)(E)/x ? "yes: <$1-$2>" : "no"; does not output the expected 'AB-B', and instead does not match. Removing the + quantifier behaves as expected. This patch is 3/4 and fixes part of the problem: the CURLYM optimization was not terminating its loop properly when it contained an ACCEPT. This patch adds a new variable 'is_accepted' which is used to ensure that the CURLYM optimization stops after an ACCEPT regop is executed.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c18
1 files changed, 14 insertions, 4 deletions
diff --git a/regexec.c b/regexec.c
index 9b510c9d70..81bf1e711a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6363,6 +6363,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
I32 orig_savestack_ix = PL_savestack_ix;
U8 * script_run_begin = NULL;
char *match_end= NULL; /* where a match MUST end to be considered successful */
+ bool is_accepted = FALSE; /* have we hit an ACCEPT opcode? */
/* Solaris Studio 12.3 messes up fetching PL_charclass['\n'] */
#if (defined(__SUNPRO_C) && (__SUNPRO_C == 0x5120) && defined(__x86_64) && defined(USE_64_BIT_ALL))
@@ -8393,6 +8394,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case ACCEPT: /* (*ACCEPT) */
+ is_accepted = true;
if (scan->flags)
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
if (ARG2L(scan)){
@@ -8959,7 +8961,8 @@ NULL
if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
goto fake_end;
- {
+
+ if (!is_accepted) {
I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
if ( max == REG_INFTY || ST.count < max )
goto curlym_do_A; /* try to match another A */
@@ -8975,6 +8978,9 @@ NULL
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
+ if (is_accepted)
+ goto curlym_close_B;
+
if (ST.Binfo.count < 0) {
/* calculate possible match of 1st char following curly */
assert(ST.B);
@@ -9016,26 +9022,29 @@ NULL
}
}
+ curlym_close_B:
if (ST.me->flags) {
/* emulate CLOSE: mark current A as captured */
U32 paren = (U32)ST.me->flags;
- if (ST.count) {
+ if (ST.count || is_accepted) {
CLOSE_CAPTURE(paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
else
rex->offs[paren].end = -1;
-
if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
{
- if (ST.count)
+ if (ST.count || is_accepted)
goto fake_end;
else
sayNO;
}
}
+ if (is_accepted)
+ goto fake_end;
+
PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol, /* match B */
script_run_begin);
NOT_REACHED; /* NOTREACHED */
@@ -9369,6 +9378,7 @@ NULL
fake_end:
if (cur_eval) {
/* we've just finished A in /(??{A})B/; now continue with B */
+ is_accepted= false;
SET_RECURSE_LOCINPUT("FAKE-END[before]", CUR_EVAL.prev_recurse_locinput);
st->u.eval.prev_rex = rex_sv; /* inner */