summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-09 22:34:13 +0100
committerYves Orton <demerphq@gmail.com>2023-03-13 21:26:08 +0800
commitacababb42be12ff2986b73c1bfa963b70bb5d54e (patch)
treedc8cc4980e6fe3de0c686cc641dbbe37d1e8e961
parent05b13cf680588a26de64f13d2b3be385e17624bc (diff)
downloadperl-acababb42be12ff2986b73c1bfa963b70bb5d54e.tar.gz
regexec.c - teach BRANCH and BRANCHJ nodes to reset capture buffers
In /((a)(b)|(a))+/ we should not end up with $2 and $4 being set at the same time. When a branch fails it should reset any capture buffers that might be touched by its branch. We change BRANCH and BRANCHJ to store the number of parens before the branch, and the number of parens after the branch was completed. When a BRANCH operation fails, we clear the buffers it contains before we continue on. It is a bit more complex than it should be because we have BRANCHJ and BRANCH. (One of these days we should merge them together.) This is also made somewhat more complex because TRIE nodes are actually branches, and may need to track capture buffers also, at two levels. The overall TRIE op, and for jump tries especially where we emulate the behavior of branches. So we have to do the same clearing logic if a trie branch fails as well.
-rw-r--r--pod/perldebguts.pod4
-rw-r--r--regcomp.c46
-rw-r--r--regcomp.h36
-rw-r--r--regcomp.sym4
-rw-r--r--regcomp_debug.c12
-rw-r--r--regcomp_trie.c28
-rw-r--r--regexec.c55
-rw-r--r--regexp.h8
-rw-r--r--regnodes.h6
-rw-r--r--t/re/pat.t24
-rw-r--r--t/re/re_tests11
11 files changed, 199 insertions, 35 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 42db53944f..6db2cd1571 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -668,7 +668,7 @@ will be lost.
# pointer of each individual branch points; each branch
# starts with the operand node of a BRANCH node.
#
- BRANCH node Match this alternative, or the next...
+ BRANCH node 1 Match this alternative, or the next...
# Literals
@@ -796,7 +796,7 @@ will be lost.
# Support for long RE
LONGJMP off 1 1 Jump far away.
- BRANCHJ off 1 1 BRANCH with long offset.
+ BRANCHJ off 2L 1 BRANCH with long offset.
# Special Case Regops
IFMATCH off 1 1 Succeeds if the following matches; non-zero
diff --git a/regcomp.c b/regcomp.c
index ef3d2557f9..61ec8c211f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4084,6 +4084,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
parse_rest:
/* Pick up the branches, linking them together. */
segment_parse_start = RExC_parse;
+ I32 npar_before_regbranch = RExC_npar - 1;
br = regbranch(pRExC_state, &flags, 1, depth+1);
/* branch_len = (paren != 0); */
@@ -4095,9 +4096,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (*RExC_parse == '|') {
if (RExC_use_BRANCHJ) {
reginsert(pRExC_state, BRANCHJ, br, depth+1);
+ ARG2La_SET(REGNODE_p(br), npar_before_regbranch);
+ ARG2Lb_SET(REGNODE_p(br), (U16)RExC_npar - 1);
}
else {
reginsert(pRExC_state, BRANCH, br, depth+1);
+ ARGa_SET(REGNODE_p(br), (U16)npar_before_regbranch);
+ ARGb_SET(REGNODE_p(br), (U16)RExC_npar - 1);
}
have_branch = 1;
}
@@ -4140,6 +4145,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */
REQUIRE_BRANCHJ(flagp, 0);
}
+ assert(OP(REGNODE_p(br)) == BRANCH || OP(REGNODE_p(br))==BRANCHJ);
+ assert(OP(REGNODE_p(lastbr)) == BRANCH || OP(REGNODE_p(lastbr))==BRANCHJ);
+ if (OP(REGNODE_p(br)) == BRANCH) {
+ if (OP(REGNODE_p(lastbr)) == BRANCH)
+ ARGb_SET(REGNODE_p(lastbr),ARGa(REGNODE_p(br)));
+ else
+ ARG2Lb_SET(REGNODE_p(lastbr),ARGa(REGNODE_p(br)));
+ }
+ else
+ if (OP(REGNODE_p(br)) == BRANCHJ) {
+ if (OP(REGNODE_p(lastbr)) == BRANCH)
+ ARGb_SET(REGNODE_p(lastbr),ARG2La(REGNODE_p(br)));
+ else
+ ARG2Lb_SET(REGNODE_p(lastbr),ARG2La(REGNODE_p(br)));
+ }
+
lastbr = br;
*flagp |= flags & (HASWIDTH | POSTPONED);
}
@@ -4213,6 +4234,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
(IV)(ender - lastbr)
);
});
+ if (OP(REGNODE_p(lastbr)) == BRANCH) {
+ ARGb_SET(REGNODE_p(lastbr),(U16)RExC_npar-1);
+ }
+ else
+ if (OP(REGNODE_p(lastbr)) == BRANCHJ) {
+ ARG2Lb_SET(REGNODE_p(lastbr),(U16)RExC_npar-1);
+ }
+
if (! REGTAIL(pRExC_state, lastbr, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
@@ -4356,6 +4385,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
regnode_offset ret;
regnode_offset chain = 0;
regnode_offset latest;
+ regnode *branch_node = NULL;
I32 flags = 0, c = 0;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
@@ -4366,10 +4396,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
if (first)
ret = 0;
else {
- if (RExC_use_BRANCHJ)
- ret = reganode(pRExC_state, BRANCHJ, 0);
- else {
- ret = reg_node(pRExC_state, BRANCH);
+ if (RExC_use_BRANCHJ) {
+ ret = reg2Lanode(pRExC_state, BRANCHJ, 0, 0);
+ branch_node = REGNODE_p(ret);
+ ARG2La_SET(branch_node, (U16)RExC_npar-1);
+ } else {
+ ret = reganode(pRExC_state, BRANCH, 0);
+ branch_node = REGNODE_p(ret);
+ ARGa_SET(branch_node, (U16)RExC_npar-1);
}
}
@@ -13351,6 +13385,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
PerlMemShared_free(trie->bitmap);
if (trie->jump)
PerlMemShared_free(trie->jump);
+ if (trie->j_before_paren)
+ PerlMemShared_free(trie->j_before_paren);
+ if (trie->j_after_paren)
+ PerlMemShared_free(trie->j_after_paren);
PerlMemShared_free(trie->wordinfo);
/* do this last!!!! */
PerlMemShared_free(ri->data->data[n]);
diff --git a/regcomp.h b/regcomp.h
index 9a1b00dece..4a4ed52219 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -183,7 +183,13 @@ struct regnode_1 {
U8 flags;
U8 type;
U16 next_off;
- U32 arg1;
+ union {
+ U32 arg1;
+ struct {
+ U16 arg1a;
+ U16 arg1b;
+ };
+ };
};
/* Node whose argument is 'SV *'. This needs to be used very carefully in
@@ -214,7 +220,13 @@ struct regnode_2L {
U8 type;
U16 next_off;
U32 arg1;
- I32 arg2;
+ union {
+ I32 arg2;
+ struct {
+ U16 arg2a;
+ U16 arg2b;
+ };
+ };
};
/* 'Two field' -- Two 32 bit signed args.
@@ -361,18 +373,26 @@ struct regnode_ssc {
#define ARG(p) ARG_VALUE(ARG_LOC(p))
#define ARGp(p) ARGp_VALUE_inline(p)
+#define ARGa(p) ARG_VALUE(ARGa_LOC(p))
+#define ARGb(p) ARG_VALUE(ARGb_LOC(p))
#define ARG1(p) ARG_VALUE(ARG1_LOC(p))
#define ARG2(p) ARG_VALUE(ARG2_LOC(p))
#define ARG3(p) ARG_VALUE(ARG3_LOC(p))
#define ARG4(p) ARG_VALUE(ARG4_LOC(p))
#define ARG2L(p) ARG_VALUE(ARG2L_LOC(p))
+#define ARG2La(p) ARG_VALUE(ARG2La_LOC(p))
+#define ARG2Lb(p) ARG_VALUE(ARG2Lb_LOC(p))
#define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val))
+#define ARGa_SET(p, val) ARG__SET(ARGa_LOC(p), (val))
+#define ARGb_SET(p, val) ARG__SET(ARGb_LOC(p), (val))
#define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val))
#define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val))
#define ARG3_SET(p, val) ARG__SET(ARG3_LOC(p), (val))
#define ARG4_SET(p, val) ARG__SET(ARG4_LOC(p), (val))
#define ARG2L_SET(p, val) ARG__SET(ARG2L_LOC(p), (val))
+#define ARG2La_SET(p, val) ARG__SET(ARG2La_LOC(p), (val))
+#define ARG2Lb_SET(p, val) ARG__SET(ARG2Lb_LOC(p), (val))
#define ARGp_SET(p, val) ARGp_SET_inline((p),(val))
#undef NEXT_OFF
@@ -454,13 +474,16 @@ struct regnode_ssc {
#define NODE_ALIGN(node)
#define ARG_LOC(p) (((struct regnode_1 *)p)->arg1)
+#define ARGa_LOC(p) (((struct regnode_1 *)p)->arg1a)
+#define ARGb_LOC(p) (((struct regnode_1 *)p)->arg1b)
#define ARGp_BYTES_LOC(p) (((struct regnode_p *)p)->arg1_sv_ptr_bytes)
#define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1)
#define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2)
#define ARG3_LOC(p) (((struct regnode_4 *)p)->arg3)
#define ARG4_LOC(p) (((struct regnode_4 *)p)->arg4)
#define ARG2L_LOC(p) (((struct regnode_2L *)p)->arg2)
-
+#define ARG2La_LOC(p) (((struct regnode_2L *)p)->arg2a)
+#define ARG2Lb_LOC(p) (((struct regnode_2L *)p)->arg2b)
/* These should no longer be used directly in most cases. Please use
* the REGNODE_AFTER() macros instead. */
@@ -1148,6 +1171,11 @@ struct _reg_trie_data {
char *bitmap; /* stclass bitmap */
U16 *jump; /* optional 1 indexed array of offsets before tail
for the node following a given word. */
+ U16 *j_before_paren; /* optional 1 indexed array of parno reset data
+ for the given jump. */
+ U16 *j_after_paren; /* optional 1 indexed array of parno reset data
+ for the given jump. */
+
reg_trie_wordinfo *wordinfo; /* array of info per word */
U16 uniquecharcount; /* unique chars in trie (width of trans table) */
U32 startstate; /* initial state - used for common prefix optimisation */
@@ -1157,6 +1185,8 @@ struct _reg_trie_data {
U32 statecount; /* Build only - number of states in the states array
(including the unused zero state) */
U32 wordcount; /* Build only */
+ U16 before_paren;
+ U16 after_paren;
#ifdef DEBUGGING
STRLEN charcount; /* Build only */
#endif
diff --git a/regcomp.sym b/regcomp.sym
index c0735aada9..e01844f9b0 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -140,7 +140,7 @@ CLUMP CLUMP, no 0 V ; Match any extended grapheme cluster sequence
#* pointer of each individual branch points; each branch
#* starts with the operand node of a BRANCH node.
#*
-BRANCH BRANCH, node 0 V ; Match this alternative, or the next...
+BRANCH BRANCH, node 1 V ; Match this alternative, or the next...
#*Literals
# NOTE: the relative ordering of these types is important do not change it
@@ -252,7 +252,7 @@ REFFAN REF, num 1 V ; Match already matched string, using /aai rul
#*Support for long RE
LONGJMP LONGJMP, off 1 . 1 ; Jump far away.
-BRANCHJ BRANCHJ, off 1 V 1 ; BRANCH with long offset.
+BRANCHJ BRANCHJ, off 2L V 1 ; BRANCH with long offset.
#*Special Case Regops
IFMATCH BRANCHJ, off 1 . 1 ; Succeeds if the following matches; non-zero flags "f", next_off "o" means lookbehind assertion starting "f..(f-o)" characters before current
diff --git a/regcomp_debug.c b/regcomp_debug.c
index 6410f5e2da..bfa5370662 100644
--- a/regcomp_debug.c
+++ b/regcomp_debug.c
@@ -408,8 +408,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpv(sv, REGNODE_NAME(op)); /* Take off const! */
k = REGNODE_TYPE(op);
-
- if (k == EXACT) {
+ if (op == BRANCH) {
+ Perl_sv_catpvf(aTHX_ sv, " (buf:%" IVdf "/%" IVdf ")", (IV)ARGa(o),(IV)ARGb(o));
+ }
+ else if (op == BRANCHJ) {
+ Perl_sv_catpvf(aTHX_ sv, " (buf:%" IVdf "/%" IVdf ")", (IV)ARG2La(o),(IV)ARG2Lb(o));
+ }
+ else if (k == EXACT) {
sv_catpvs(sv, " ");
/* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
* is a crude hack but it may be the best for now since
@@ -462,6 +467,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
);
sv_catpvs(sv, "]");
}
+ if (trie->before_paren || trie->after_paren)
+ Perl_sv_catpvf(aTHX_ sv, " (buf:%" IVdf "/%" IVdf ")",
+ (IV)trie->before_paren,(IV)trie->after_paren);
} else if (k == CURLY) {
U32 lo = ARG1(o), hi = ARG2(o);
if (ARG3(o) || ARG4(o))
diff --git a/regcomp_trie.c b/regcomp_trie.c
index cf692d0821..8a6b4b639a 100644
--- a/regcomp_trie.c
+++ b/regcomp_trie.c
@@ -469,10 +469,26 @@ is the recommended Unicode-aware way of saying
trie->wordinfo[curword].accept = state; \
\
if ( noper_next < tail ) { \
- if (!trie->jump) \
+ if (!trie->jump) { \
trie->jump = (U16 *) PerlMemShared_calloc( word_count + 1, \
sizeof(U16) ); \
+ trie->j_before_paren = (U16 *) PerlMemShared_calloc( word_count + 1, \
+ sizeof(U16) ); \
+ trie->j_after_paren = (U16 *) PerlMemShared_calloc( word_count + 1, \
+ sizeof(U16) ); \
+ } \
trie->jump[curword] = (U16)(noper_next - convert); \
+ U16 set_before_paren; \
+ U16 set_after_paren; \
+ if (OP(cur) == BRANCH) { \
+ set_before_paren = ARGa(cur); \
+ set_after_paren = ARGb(cur); \
+ } else { \
+ set_before_paren = ARG2La(cur); \
+ set_after_paren = ARG2Lb(cur); \
+ } \
+ trie->j_before_paren[curword] = set_before_paren; \
+ trie->j_after_paren[curword] = set_after_paren; \
if (!jumper) \
jumper = noper_next; \
if (!nextbranch) \
@@ -533,6 +549,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
U32 next_alloc = 0;
regnode *jumper = NULL;
regnode *nextbranch = NULL;
+ regnode *lastbranch = NULL;
regnode *convert = NULL;
U32 *prev_states; /* temp array mapping each state to previous one */
/* we just use folder as a flag in utf8 */
@@ -569,6 +586,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, REGNODE_NAME(flags) );
}
+ /* create the trie struct, all zeroed */
trie = (reg_trie_data *) PerlMemShared_calloc( 1, sizeof(reg_trie_data) );
trie->refcount = 1;
trie->startstate = 1;
@@ -639,6 +657,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
STRLEN maxchars = 0;
bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the
bitmap?*/
+ lastbranch = cur;
if (OP(noper) == NOTHING) {
/* skip past a NOTHING at the start of an alternation
@@ -797,6 +816,13 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
trie->maxlen = maxchars;
}
} /* end first pass */
+ trie->before_paren = OP(first) == BRANCH
+ ? ARGa(first)
+ : ARG2La(first); /* BRANCHJ */
+
+ trie->after_paren = OP(lastbranch) == BRANCH
+ ? ARGb(lastbranch)
+ : ARG2Lb(lastbranch); /* BRANCHJ */
DEBUG_TRIE_COMPILE_r(
Perl_re_indentf( aTHX_
"TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
diff --git a/regexec.c b/regexec.c
index 6d7f52cb33..b112e077d5 100644
--- a/regexec.c
+++ b/regexec.c
@@ -350,6 +350,29 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
rex->lastparen = n; \
rex->lastcloseparen = lcp;
+#define CAPTURE_CLEAR(from_ix, to_ix, str) \
+STMT_START { \
+ U16 my_ix; \
+ if (from_ix) { \
+ for ( my_ix = from_ix; my_ix <= to_ix; my_ix++ ) { \
+ DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_ \
+ "CAPTURE_CLEAR %s \\%" IVdf ": " \
+ "%" IVdf "(%" IVdf ") .. %" IVdf \
+ " => " \
+ "%" IVdf "(%" IVdf ") .. %" IVdf \
+ "\n", \
+ depth, str, (IV)my_ix, \
+ (IV)rex->offs[my_ix].start, \
+ (IV)rex->offs[my_ix].start_tmp, \
+ (IV)rex->offs[my_ix].end, \
+ (IV)-1, (IV)-1, (IV)-1)); \
+ rex->offs[my_ix].start = -1; \
+ rex->offs[my_ix].start_tmp = -1; \
+ rex->offs[my_ix].end = -1; \
+ } \
+ } \
+} STMT_END
+
STATIC void
S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
@@ -6640,6 +6663,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* what trie are we using right now */
reg_trie_data * const trie
= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
+ ST.before_paren = trie->before_paren;
+ ST.after_paren = trie->after_paren;
+ assert(ST.before_paren<=rex->nparens);
+ assert(ST.after_paren<=rex->nparens);
+
HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
U32 state = trie->startstate;
@@ -6689,6 +6717,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
U32 accepted = 0; /* have we seen any accepting states? */
ST.jump = trie->jump;
+ ST.j_before_paren = trie->j_before_paren;
+ ST.j_after_paren= trie->j_after_paren;
ST.me = scan;
ST.firstpos = NULL;
ST.longfold = FALSE; /* char longer if folded => it's harder */
@@ -6800,6 +6830,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
* rest of the branch */
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
+ if (ST.after_paren) {
+ assert(ST.before_paren<=rex->nparens && ST.after_paren<=rex->nparens);
+ CAPTURE_CLEAR(ST.before_paren+1, ST.after_paren,"TRIE_next_fail");
+ }
}
if (!--ST.accepted) {
DEBUG_EXECUTE_r({
@@ -6889,10 +6923,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
uc += chars;
}
}
+ if (ST.jump && ST.jump[ST.nextword]) {
+ scan = ST.me + ST.jump[ST.nextword];
+ ST.before_paren = ST.j_before_paren[ST.nextword];
+ assert(ST.before_paren <= rex->nparens);
+ ST.after_paren = ST.j_after_paren[ST.nextword];
+ assert(ST.after_paren <= rex->nparens);
+ } else {
+ scan = ST.me + NEXT_OFF(ST.me);
+ }
- scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
- ? ST.jump[ST.nextword]
- : NEXT_OFF(ST.me));
DEBUG_EXECUTE_r({
Perl_re_exec_indentf( aTHX_ "%sTRIE matched word #%d, continuing%s\n",
@@ -8957,9 +8997,15 @@ NULL
next = scan + ARG(scan);
if (next == scan)
next = NULL;
- /* FALLTHROUGH */
+ ST.before_paren = ARG2La(scan);
+ ST.after_paren = ARG2Lb(scan);
+ goto branch_logic;
+ NOT_REACHED; /* NOTREACHED */
case BRANCH: /* /(...|A|...)/ */
+ ST.before_paren = ARGa(scan);
+ ST.after_paren = ARGb(scan);
+ branch_logic:
scan = REGNODE_AFTER_opcode(scan,state_num); /* scan now points to inner node */
assert(scan);
ST.lastparen = rex->lastparen;
@@ -9004,6 +9050,7 @@ NULL
}
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
+ CAPTURE_CLEAR(ST.before_paren+1,ST.after_paren,"BRANCH_next_fail");
scan = ST.next_branch;
/* no more branches? */
if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
diff --git a/regexp.h b/regexp.h
index 055475b8b8..ba5d73034d 100644
--- a/regexp.h
+++ b/regexp.h
@@ -831,6 +831,8 @@ typedef struct regmatch_state {
U32 lastparen;
U32 lastcloseparen;
CHECKPOINT cp;
+ U16 before_paren;
+ U16 after_paren;
} branchlike;
@@ -840,6 +842,8 @@ typedef struct regmatch_state {
U32 lastparen;
U32 lastcloseparen;
CHECKPOINT cp;
+ U16 before_paren;
+ U16 after_paren;
regnode *next_branch; /* next branch node */
} branch;
@@ -850,10 +854,14 @@ typedef struct regmatch_state {
U32 lastparen;
U32 lastcloseparen;
CHECKPOINT cp;
+ U16 before_paren;
+ U16 after_paren;
U32 accepted; /* how many accepting states left */
bool longfold;/* saw a fold with a 1->n char mapping */
U16 *jump; /* positive offsets from me */
+ U16 *j_before_paren;
+ U16 *j_after_paren;
regnode *me; /* Which node am I - needed for jump tries*/
U8 *firstpos;/* pos in string of first trie match */
U32 firstchars;/* len in chars of firstpos from start */
diff --git a/regnodes.h b/regnodes.h
index b45c1261f5..321a841989 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -29,8 +29,8 @@ typedef struct regnode tregnode_BOUND;
typedef struct regnode tregnode_BOUNDA;
typedef struct regnode tregnode_BOUNDL;
typedef struct regnode tregnode_BOUNDU;
-typedef struct regnode tregnode_BRANCH;
-typedef struct regnode_1 tregnode_BRANCHJ;
+typedef struct regnode_1 tregnode_BRANCH;
+typedef struct regnode_2L tregnode_BRANCHJ;
typedef struct regnode_1 tregnode_CLOSE;
typedef struct regnode tregnode_CLUMP;
typedef struct regnode_1 tregnode_COMMIT;
@@ -2026,7 +2026,7 @@ EXTCONST struct regnode_meta PL_regnode_info[] = {
{
/* #40 op BRANCH */
.type = BRANCH,
- .arg_len = 0,
+ .arg_len = EXTRA_SIZE(tregnode_BRANCH),
.arg_len_varies = 0,
.off_by_arg = 0
},
diff --git a/t/re/pat.t b/t/re/pat.t
index b837157c42..95070b2290 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -2427,7 +2427,6 @@ SKIP:
}, 'ok', {}, 'gh20826: test regex save stack overflow');
}
{
- local $::TODO = "Not Yet Implemented";
my ($x, $y);
ok( "aaa" =~ /(?:(a)?\1)+/,
"GH Issue #18865 'aaa' - pattern matches");
@@ -2435,26 +2434,33 @@ SKIP:
ok( "aaa" =~ /(?:((?{})a)?\1)+/,
"GH Issue #18865 'aaa' - deoptimized pattern matches");
$y = "($-[0],$+[0])";
- is( $y, $x,
- "GH Issue #18865 'aaa' - test optimization");
-
+ {
+ local $::TODO = "Not Yet Implemented";
+ is( $y, $x,
+ "GH Issue #18865 'aaa' - test optimization");
+ }
ok( "ababab" =~ /(?:(?:(ab))?\1)+/,
"GH Issue #18865 'ababab' - pattern matches");
$x = "($-[0],$+[0])";
ok( "ababab" =~ /(?:(?:((?{})ab))?\1)+/,
"GH Issue #18865 'ababab' - deoptimized pattern matches");
$y = "($-[0],$+[0])";
- is( $y, $x,
- "GH Issue #18865 'ababab' - test optimization");
-
+ {
+ local $::TODO = "Not Yet Implemented";
+ is( $y, $x,
+ "GH Issue #18865 'ababab' - test optimization");
+ }
ok( "XaaXbbXb" =~ /(?:X([ab])?\1)+/,
"GH Issue #18865 'XaaXbbXb' - pattern matches");
$x = "($-[0],$+[0])";
ok( "XaaXbbXb" =~ /(?:X((?{})[ab])?\1)+/,
"GH Issue #18865 'XaaXbbXb' - deoptimized pattern matches");
$y = "($-[0],$+[0])";
- is( $y, $x,
- "GH Issue #18865 'XaaXbbXb' - test optimization");
+ {
+ local $::TODO = "Not Yet Implemented";
+ is( $y, $x,
+ "GH Issue #18865 'XaaXbbXb' - test optimization");
+ }
}
} # End of sub run_tests
diff --git a/t/re/re_tests b/t/re/re_tests
index 7379a39787..bfccaa04ed 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -478,7 +478,7 @@ a(?:b|c|d)+(.) acdbcdbe y $1 e
a(?:b|c|d){2}(.) acdbcdbe y $1 b
a(?:b|c|d){4,5}(.) acdbcdbe y $1 b
a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d
-((foo)|(bar))* foobar Ty $1-$2-$3 bar--bar # was bar-foo-bar prior to 5.37.7
+((foo)|(bar))* foobar y $1-$2-$3 bar--bar # was bar-foo-bar prior to 5.37.7
:(?: - c - Sequence (? incomplete
a(?:b|c|d){6,7}(.) acdbcdbe y $1 e
a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e
@@ -501,7 +501,7 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce
((a{4})+) aaaaaaaaa y $1 aaaaaaaa
(((aa){2})+) aaaaaaaaaa y $1 aaaaaaaa
(((a{2}){2})+) aaaaaaaaaa y $1 aaaaaaaa
-(?:(f)(o)(o)|(b)(a)(r))* foobar Ty $1:$2:$3:$4:$5:$6 :::b:a:r
+(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 :::b:a:r
(?<=a)b ab y $& b
(?<=af?)b ab y $& b
(?<=a)b cb n - -
@@ -964,8 +964,8 @@ tt+$ xxxtt y - -
(?i) y - -
(?a:((?u)\w)\W) \xC0\xC0 y $& \xC0\xC0
'(?!\A)x'm a\nxb\n y - -
-^(a(b)?)+$ aba y -$1-$2- -a-- # !normal
-^(aa(bb)?)+$ aabbaa y -$1-$2- -aa-- # !normal
+^(a(b)?)+$ aba y -$1-$2- -a--
+^(aa(bb)?)+$ aabbaa y -$1-$2- -aa--
'^.{9}abc.*\n'm 123\nabcabcabcabc\n y - -
^(a)?a$ a y -$1- --
^(a)?(?(1)a|b)+$ a n - -
@@ -2139,7 +2139,8 @@ AB\s+\x{100} AB \x{100}X y - -
/(([ab]+)|([cd]+)|([ef]+))+/ ace y $1-$2-$3-$4=$& e---e=ace
/(([ab]+)|([cd]+)|([ef]+))+/ aceb Ty $1-$2-$3-$4=$& b-b--=aceb
/(([ab]+)|([cd]+)|([ef]+))+/ acebd Ty $1-$2-$3-$4=$& d--d-=acebd
-/(([ab]+)|([cd]+)|([ef]+))+/ acebdf Ty $1-$2-$3-$4=$& f---f=acebdf
+/(([ab]+)|([cd]+)|([ef]+))+/ acebdf y $1-$2-$3-$4=$& f---f=acebdf
+/((a)(b)(c)|(a)(b)|(a))+/ abcaba y $1+$2-$3-$4+$5-$6+$7=$& a+--+-+a=abcaba
# Keep these lines at the end of the file
# pat string y/n/etc expr expected-expr skip-reason comment
# vim: softtabstop=0 noexpandtab