diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2021-02-05 17:08:10 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2021-02-05 17:25:00 -0800 |
commit | 70b673eb768eb7288639cbbe4642c2912b7d204e (patch) | |
tree | c36664480e6d05be460346e5fc1abbcd2fee8eb9 /lib/regexec.c | |
parent | f5596242f91109b393919653d091bbeecc4b0a55 (diff) | |
download | gnulib-70b673eb768eb7288639cbbe4642c2912b7d204e.tar.gz |
regex: fix longstanding backref match bug
This fixes a longstanding glibc bug concerning backreferences
<https://sourceware.org/11053> (2009-12-04).
* lib/regexec.c (proceed_next_node, push_fail_stack)
(pop_fail_stack): Push and pop the previous registers
as well as the current ones. All callers changed.
(set_regs): Also pop if CUR_NODE has already been checked,
so that it does not get added as a duplicate set entry.
(update_regs): Fix comment location.
* tests/test-regex.c (tests): New constant.
(bug_regex11): New test function.
(main): Bump alarm value. Call new test function.
Diffstat (limited to 'lib/regexec.c')
-rw-r--r-- | lib/regexec.c | 26 |
1 files changed, 17 insertions, 9 deletions
diff --git a/lib/regexec.c b/lib/regexec.c index fdd2e373e7..424bc8d158 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -59,7 +59,7 @@ static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, Idx cur_idx, Idx nmatch); static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, Idx nregs, - regmatch_t *regs, + regmatch_t *regs, regmatch_t *prevregs, re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, @@ -1211,6 +1211,7 @@ check_halt_state_context (const re_match_context_t *mctx, static Idx proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, + regmatch_t *prevregs, Idx *pidx, Idx node, re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) { @@ -1243,7 +1244,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, /* Otherwise, push the second epsilon-transition on the fail stack. */ else if (fs != NULL && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) + prevregs, eps_via_nodes)) return -2; /* We know we are going to exit. */ @@ -1316,7 +1317,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, static reg_errcode_t __attribute_warn_unused_result__ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, - Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) + Idx nregs, regmatch_t *regs, regmatch_t *prevregs, + re_node_set *eps_via_nodes) { reg_errcode_t err; Idx num = fs->num++; @@ -1332,23 +1334,26 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, } fs->stack[num].idx = str_idx; fs->stack[num].node = dest_node; - fs->stack[num].regs = re_malloc (regmatch_t, nregs); + fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs); if (fs->stack[num].regs == NULL) return REG_ESPACE; memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); + memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs); err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err; } static Idx pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, - regmatch_t *regs, re_node_set *eps_via_nodes) + regmatch_t *regs, regmatch_t *prevregs, + re_node_set *eps_via_nodes) { if (fs == NULL || fs->num == 0) return -1; Idx num = --fs->num; *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); + memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs); re_node_set_free (eps_via_nodes); re_free (fs->stack[num].regs); *eps_via_nodes = fs->stack[num].eps_via_nodes; @@ -1408,7 +1413,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, { update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); - if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node) + || re_node_set_contains (&eps_via_nodes, cur_node)) { Idx reg_idx; cur_node = -1; @@ -1418,7 +1424,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) { cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); + prev_idx_match, &eps_via_nodes); break; } } @@ -1431,7 +1437,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, } /* Proceed to next node. */ - cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, + cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match, + &idx, cur_node, &eps_via_nodes, fs); if (__glibc_unlikely (cur_node < 0)) @@ -1443,7 +1450,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, free_fail_stack_return (fs); return REG_ESPACE; } - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, &eps_via_nodes); + cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, + prev_idx_match, &eps_via_nodes); if (cur_node < 0) { re_node_set_free (&eps_via_nodes); |