diff options
author | Yves Orton <demerphq@gmail.com> | 2023-01-09 22:34:13 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2023-03-13 21:26:08 +0800 |
commit | acababb42be12ff2986b73c1bfa963b70bb5d54e (patch) | |
tree | dc8cc4980e6fe3de0c686cc641dbbe37d1e8e961 /regcomp.c | |
parent | 05b13cf680588a26de64f13d2b3be385e17624bc (diff) | |
download | perl-acababb42be12ff2986b73c1bfa963b70bb5d54e.tar.gz |
regexec.c - teach BRANCH and BRANCHJ nodes to reset capture buffers
In /((a)(b)|(a))+/ we should not end up with $2 and $4 being set at
the same time. When a branch fails it should reset any capture buffers
that might be touched by its branch.
We change BRANCH and BRANCHJ to store the number of parens before the
branch, and the number of parens after the branch was completed. When
a BRANCH operation fails, we clear the buffers it contains before we
continue on.
It is a bit more complex than it should be because we have BRANCHJ
and BRANCH. (One of these days we should merge them together.)
This is also made somewhat more complex because TRIE nodes are actually
branches, and may need to track capture buffers also, at two levels.
The overall TRIE op, and for jump tries especially where we emulate
the behavior of branches. So we have to do the same clearing logic if
a trie branch fails as well.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 46 |
1 files changed, 42 insertions, 4 deletions
@@ -4084,6 +4084,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) parse_rest: /* Pick up the branches, linking them together. */ segment_parse_start = RExC_parse; + I32 npar_before_regbranch = RExC_npar - 1; br = regbranch(pRExC_state, &flags, 1, depth+1); /* branch_len = (paren != 0); */ @@ -4095,9 +4096,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (*RExC_parse == '|') { if (RExC_use_BRANCHJ) { reginsert(pRExC_state, BRANCHJ, br, depth+1); + ARG2La_SET(REGNODE_p(br), npar_before_regbranch); + ARG2Lb_SET(REGNODE_p(br), (U16)RExC_npar - 1); } else { reginsert(pRExC_state, BRANCH, br, depth+1); + ARGa_SET(REGNODE_p(br), (U16)npar_before_regbranch); + ARGb_SET(REGNODE_p(br), (U16)RExC_npar - 1); } have_branch = 1; } @@ -4140,6 +4145,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */ REQUIRE_BRANCHJ(flagp, 0); } + assert(OP(REGNODE_p(br)) == BRANCH || OP(REGNODE_p(br))==BRANCHJ); + assert(OP(REGNODE_p(lastbr)) == BRANCH || OP(REGNODE_p(lastbr))==BRANCHJ); + if (OP(REGNODE_p(br)) == BRANCH) { + if (OP(REGNODE_p(lastbr)) == BRANCH) + ARGb_SET(REGNODE_p(lastbr),ARGa(REGNODE_p(br))); + else + ARG2Lb_SET(REGNODE_p(lastbr),ARGa(REGNODE_p(br))); + } + else + if (OP(REGNODE_p(br)) == BRANCHJ) { + if (OP(REGNODE_p(lastbr)) == BRANCH) + ARGb_SET(REGNODE_p(lastbr),ARG2La(REGNODE_p(br))); + else + ARG2Lb_SET(REGNODE_p(lastbr),ARG2La(REGNODE_p(br))); + } + lastbr = br; *flagp |= flags & (HASWIDTH | POSTPONED); } @@ -4213,6 +4234,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) (IV)(ender - lastbr) ); }); + if (OP(REGNODE_p(lastbr)) == BRANCH) { + ARGb_SET(REGNODE_p(lastbr),(U16)RExC_npar-1); + } + else + if (OP(REGNODE_p(lastbr)) == BRANCHJ) { + ARG2Lb_SET(REGNODE_p(lastbr),(U16)RExC_npar-1); + } + if (! REGTAIL(pRExC_state, lastbr, ender)) { REQUIRE_BRANCHJ(flagp, 0); } @@ -4356,6 +4385,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) regnode_offset ret; regnode_offset chain = 0; regnode_offset latest; + regnode *branch_node = NULL; I32 flags = 0, c = 0; DECLARE_AND_GET_RE_DEBUG_FLAGS; @@ -4366,10 +4396,14 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) if (first) ret = 0; else { - if (RExC_use_BRANCHJ) - ret = reganode(pRExC_state, BRANCHJ, 0); - else { - ret = reg_node(pRExC_state, BRANCH); + if (RExC_use_BRANCHJ) { + ret = reg2Lanode(pRExC_state, BRANCHJ, 0, 0); + branch_node = REGNODE_p(ret); + ARG2La_SET(branch_node, (U16)RExC_npar-1); + } else { + ret = reganode(pRExC_state, BRANCH, 0); + branch_node = REGNODE_p(ret); + ARGa_SET(branch_node, (U16)RExC_npar-1); } } @@ -13351,6 +13385,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) PerlMemShared_free(trie->bitmap); if (trie->jump) PerlMemShared_free(trie->jump); + if (trie->j_before_paren) + PerlMemShared_free(trie->j_before_paren); + if (trie->j_after_paren) + PerlMemShared_free(trie->j_after_paren); PerlMemShared_free(trie->wordinfo); /* do this last!!!! */ PerlMemShared_free(ri->data->data[n]); |