summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--pcre_compile.c19
-rw-r--r--pcre_exec.c281
-rw-r--r--pcre_internal.h9
-rw-r--r--testdata/testinput118
-rw-r--r--testdata/testinput26
-rw-r--r--testdata/testoutput131
-rw-r--r--testdata/testoutput213
8 files changed, 231 insertions, 153 deletions
diff --git a/ChangeLog b/ChangeLog
index 7918318..f73b8f3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -126,6 +126,13 @@ Version 8.13 30-Apr-2011
23. Add the ++ qualifier feature to pcretest, to show the remainder of the
subject after a captured substring (to make it easier to tell which of a
number of identical substrings has been captured).
+
+24. The way atomic groups are processed by pcre_exec() has been changed so that
+ if they are repeated, backtracking one repetition now resets captured
+ values correctly. For example, if ((?>(a+)b)+aabab) is matched against
+ "aaaabaaabaabab" the value of captured group 2 is now correctly recorded as
+ "aaa". Previously, it would have been "a". As part of this code
+ refactoring, the way recursive calls are handled has also been changed.
Version 8.12 15-Jan-2011
diff --git a/pcre_compile.c b/pcre_compile.c
index 6d043c7..b738dfe 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1694,6 +1694,7 @@ _pcre_find_bracket(const uschar *code, BOOL utf8, int number)
for (;;)
{
register int c = *code;
+
if (c == OP_END) return NULL;
/* XCLASS is used for classes that cannot be represented just by a bit
@@ -4726,7 +4727,14 @@ for (;; ptr++)
}
/* If the maximum is unlimited, set a repeater in the final copy. For
- ONCE brackets, that's all we need to do.
+ ONCE brackets, that's all we need to do.
+
+ (To be done next, after recursion adjusted)
+ However, possessively repeated
+ ONCE brackets can be converted into non-capturing brackets, as the
+ behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
+ deal with possessive ONCEs specially.
+ (....)
Otherwise, if the quantifier was possessive, we convert the BRA code to
the POS form, and the KET code to KETRPOS. (It turns out to be convenient
@@ -4748,7 +4756,12 @@ for (;; ptr++)
uschar *ketcode = code - 1 - LINK_SIZE;
uschar *bracode = ketcode - GET(ketcode, 1);
- if (*bracode == OP_ONCE)
+/****
+ if (*bracode == OP_ONCE && possessive_quantifier)
+ *bracode = OP_BRA;
+****/
+
+ if (*bracode == OP_ONCE)
*ketcode = OP_KETRMAX + repeat_type;
else
{
@@ -5685,7 +5698,7 @@ for (;; ptr++)
/* Insert the recursion/subroutine item, automatically wrapped inside
"once" brackets. Set up a "previous group" length so that a
subsequent quantifier will work. */
-
+
*code = OP_ONCE;
PUT(code, 1, 2 + 2*LINK_SIZE);
code += 1 + LINK_SIZE;
diff --git a/pcre_exec.c b/pcre_exec.c
index bb261d1..cbbff84 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -76,10 +76,11 @@ negative to avoid the external error codes. */
#define MATCH_ACCEPT (-999)
#define MATCH_COMMIT (-998)
#define MATCH_KETRPOS (-997)
-#define MATCH_PRUNE (-996)
-#define MATCH_SKIP (-995)
-#define MATCH_SKIP_ARG (-994)
-#define MATCH_THEN (-993)
+#define MATCH_ONCE (-996)
+#define MATCH_PRUNE (-995)
+#define MATCH_SKIP (-994)
+#define MATCH_SKIP_ARG (-993)
+#define MATCH_THEN (-992)
/* This is a convenience macro for code that occurs many times. */
@@ -276,7 +277,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63};
+ RM61, RM62, RM63, RM64, RM65, RM66 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -845,6 +846,7 @@ for (;;)
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
eptrb, RM1);
+ if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
if (rrc != MATCH_NOMATCH &&
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
RRETURN(rrc);
@@ -854,13 +856,15 @@ for (;;)
}
DPRINTF(("bracket %d failed\n", number));
-
md->offset_vector[offset] = save_offset1;
md->offset_vector[offset+1] = save_offset2;
md->offset_vector[md->offset_end - number] = save_offset3;
+
+ /* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or
+ MATCH_THEN. */
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
- RRETURN(MATCH_NOMATCH);
+ RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH));
}
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
@@ -874,29 +878,49 @@ for (;;)
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
- /* Non-capturing bracket, except for possessive with unlimited repeat. Loop
- for all the alternatives. When we get to the final alternative within the
- brackets, we used to return the result of a recursive call to match()
- whatever happened so it was possible to reduce stack usage by turning this
- into a tail recursion, except in the case of a possibly empty group.
- However, now that there is the possiblity of (*THEN) occurring in the final
- alternative, this optimization is no longer possible. */
+ /* Non-capturing or atomic group, except for possessive with unlimited
+ repeat. Loop for all the alternatives. When we get to the final alternative
+ within the brackets, we used to return the result of a recursive call to
+ match() whatever happened so it was possible to reduce stack usage by
+ turning this into a tail recursion, except in the case of a possibly empty
+ group. However, now that there is the possiblity of (*THEN) occurring in
+ the final alternative, this optimization is no longer possible.
+
+ MATCH_ONCE is returned when the end of an atomic group is successfully
+ reached, but subsequent matching fails. It passes back up the tree (causing
+ captured values to be reset) until the original atomic group level is
+ reached. This is tested by comparing md->once_target with the start of the
+ group. At this point, the return is converted into MATCH_NOMATCH so that
+ previous backup points can be taken. */
+ case OP_ONCE:
case OP_BRA:
case OP_SBRA:
DPRINTF(("start non-capturing bracket\n"));
+
for (;;)
{
- if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
+ if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
RM2);
if (rrc != MATCH_NOMATCH &&
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
+ {
+ if (rrc == MATCH_ONCE)
+ {
+ const uschar *scode = ecode;
+ if (*scode != OP_ONCE) /* If not at start, find it */
+ {
+ while (*scode == OP_ALT) scode += GET(scode, 1);
+ scode -= GET(scode, 1);
+ }
+ if (md->once_target == scode) rrc = MATCH_NOMATCH;
+ }
RRETURN(rrc);
+ }
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
}
-
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
RRETURN(MATCH_NOMATCH);
@@ -1299,10 +1323,14 @@ for (;;)
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
case OP_END:
+
+/*
if (md->recursive != NULL)
{
recursion_info *rec = md->recursive;
+
md->recursive = rec->prevrec;
+
memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
offset_top = rec->save_offset_top;
@@ -1312,13 +1340,16 @@ for (;;)
break;
}
}
-
+*/
/* Otherwise, if we have matched an empty string, fail if not in an
assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
is set and we have matched at the start of the subject. In both cases,
backtracking will then try other alternatives, if any. */
- else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
+/* else */ if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
+
+ md->recursive == NULL &&
+
(md->notempty ||
(md->notempty_atstart &&
mstart == md->start_subject + md->start_offset)))
@@ -1481,19 +1512,19 @@ for (;;)
/* Recursion either matches the current regex, or some subexpression. The
offset data is the offset to the starting bracket from the start of the
whole pattern. (This is so that it works from duplicated subpatterns.)
-
- If there are any capturing brackets started but not finished, we have to
- save their starting points and reinstate them after the recursion. However,
- we don't know how many such there are (offset_top records the completed
- total) so we just have to save all the potential data. There may be up to
- 65535 such values, which is too large to put on the stack, but using malloc
- for small numbers seems expensive. As a compromise, the stack is used when
- there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
- is used.
+
+ The state of the capturing groups is preserved over recursion, and
+ re-instated afterwards. We don't know how many are started and not yet
+ finished (offset_top records the completed total) so we just have to save
+ all the potential data. There may be up to 65535 such values, which is too
+ large to put on the stack, but using malloc for small numbers seems
+ expensive. As a compromise, the stack is used when there are no more than
+ REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
There are also other values that have to be saved. We use a chained
sequence of blocks that actually live on the stack. Thanks to Robin Houston
- for the original version of this logic. */
+ for the original version of this logic. It has, however, been hacked around
+ a lot, so he is not to blame for the current way it works. */
case OP_RECURSE:
{
@@ -1506,12 +1537,11 @@ for (;;)
new_recursive.prevrec = md->recursive;
md->recursive = &new_recursive;
- /* Find where to continue from afterwards */
+ /* Where to continue from afterwards */
ecode += 1 + LINK_SIZE;
- new_recursive.after_call = ecode;
- /* Now save the offset data. */
+ /* Now save the offset data */
new_recursive.saved_max = md->offset_end;
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
@@ -1522,13 +1552,12 @@ for (;;)
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
}
-
memcpy(new_recursive.offset_save, md->offset_vector,
new_recursive.saved_max * sizeof(int));
- new_recursive.save_offset_top = offset_top;
- /* OK, now we can do the recursion. For each top-level alternative we
- restore the offset and recursion data. */
+ /* OK, now we can do the recursion. After processing each alternative,
+ restore the offset data. If there were nested recursions, md->recursive
+ might be changed, so reset it before looping. */
DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
cbegroup = (*callpat >= OP_SBRA);
@@ -1537,13 +1566,22 @@ for (;;)
if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
md, eptrb, RM6);
+ memcpy(md->offset_vector, new_recursive.offset_save,
+ new_recursive.saved_max * sizeof(int));
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
DPRINTF(("Recursion matched\n"));
md->recursive = new_recursive.prevrec;
if (new_recursive.offset_save != stacksave)
(pcre_free)(new_recursive.offset_save);
- MRRETURN(MATCH_MATCH);
+
+ /* Set where we got to in the subject, and reset the start in case
+ it was changed by \K. This *is* propagated back out of a recursion,
+ for Perl compatibility. */
+
+ eptr = md->end_match_ptr;
+ mstart = md->start_match_ptr;
+ goto RECURSION_MATCHED; /* Exit loop; end processing */
}
else if (rrc != MATCH_NOMATCH &&
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
@@ -1555,8 +1593,6 @@ for (;;)
}
md->recursive = &new_recursive;
- memcpy(md->offset_vector, new_recursive.offset_save,
- new_recursive.saved_max * sizeof(int));
callpat += GET(callpat, 1);
}
while (*callpat == OP_ALT);
@@ -1567,78 +1603,9 @@ for (;;)
(pcre_free)(new_recursive.offset_save);
MRRETURN(MATCH_NOMATCH);
}
- /* Control never reaches here */
-
- /* "Once" brackets are like assertion brackets except that after a match,
- the point in the subject string is not moved back. Thus there can never be
- a move back into the brackets. Friedl calls these "atomic" subpatterns.
- Check the alternative branches in turn - the matching won't pass the KET
- for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer, but resetting
- the start-of-match value in case it was changed by \K. */
-
- case OP_ONCE:
- prev = ecode;
- saved_eptr = eptr;
-
- do
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
- if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
- {
- mstart = md->start_match_ptr;
- break;
- }
- if (rrc != MATCH_NOMATCH &&
- (rrc != MATCH_THEN || md->start_match_ptr != ecode))
- RRETURN(rrc);
- ecode += GET(ecode,1);
- }
- while (*ecode == OP_ALT);
-
- /* If hit the end of the group (which could be repeated), fail */
-
- if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
-
- /* Continue after the group, updating the offsets high water mark, since
- extracts may have been taken. */
-
- do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
-
- offset_top = md->end_offset_top;
- eptr = md->end_match_ptr;
-
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. */
-
- if (*ecode == OP_KET || eptr == saved_eptr)
- {
- ecode += 1+LINK_SIZE;
- break;
- }
-
- /* The repeating kets try the rest of the pattern or restart from the
- preceding bracket, in the appropriate order. The second "call" of match()
- uses tail recursion, to avoid using another stack frame. */
-
- if (*ecode == OP_KETRMIN)
- {
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode = prev;
- }
- else /* OP_KETRMAX */
- {
- md->match_function_type = MATCH_CBEGROUP;
- RMATCH(eptr, prev, offset_top, md, eptrb, RM9);
- if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- ecode += 1 + LINK_SIZE;
- }
- goto TAIL_RECURSE;
-
- /* Control never gets here */
+
+ RECURSION_MATCHED:
+ break;
/* An alternation is the end of a branch; scan along to find the end of the
bracketed group and go to there. */
@@ -1691,26 +1658,25 @@ for (;;)
case OP_KETRMAX:
case OP_KETRPOS:
prev = ecode - GET(ecode, 1);
-
+
/* If this was a group that remembered the subject start, in order to break
infinite repeats of empty string matches, retrieve the subject start from
the chain. Otherwise, set it NULL. */
- if (*prev >= OP_SBRA)
+ if (*prev >= OP_SBRA || *prev == OP_ONCE)
{
saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
eptrb = eptrb->epb_prev; /* Backup to previous group */
}
else saved_eptr = NULL;
- /* If we are at the end of an assertion group or an atomic group, stop
- matching and return MATCH_MATCH, but record the current high water mark for
- use by positive assertions. We also need to record the match start in case
- it was changed by \K. */
+ /* If we are at the end of an assertion group, stop matching and return
+ MATCH_MATCH, but record the current high water mark for use by positive
+ assertions. We also need to record the match start in case it was changed
+ by \K. */
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
- *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
- *prev == OP_ONCE)
+ *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT)
{
md->end_match_ptr = eptr; /* For ONCE */
md->end_offset_top = offset_top;
@@ -1720,9 +1686,11 @@ for (;;)
/* For capturing groups we have to check the group number back at the start
and if necessary complete handling an extraction by setting the offsets and
- bumping the high water mark. Note that whole-pattern recursion is coded as
- a recurse into group 0, so it won't be picked up here. Instead, we catch it
- when the OP_END is reached. Other recursion is handled here. */
+ bumping the high water mark. Whole-pattern recursion is coded as a recurse
+ into group 0, so it won't be picked up here. Instead, we catch it when the
+ OP_END is reached. Other recursion is handled here. We just have to record
+ the current subject position and start match pointer and give a MATCH
+ return. */
if (*prev == OP_CBRA || *prev == OP_SCBRA ||
*prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
@@ -1735,6 +1703,17 @@ for (;;)
printf("\n");
#endif
+ /* Handle a recursively called group. */
+
+ if (md->recursive != NULL && md->recursive->group_num == number)
+ {
+ md->end_match_ptr = eptr;
+ md->start_match_ptr = mstart;
+ RRETURN(MATCH_MATCH);
+ }
+
+ /* Deal with capturing */
+
md->capture_last = number;
if (offset >= md->offset_max) md->offset_overflow = TRUE; else
{
@@ -1762,32 +1741,27 @@ for (;;)
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
if (offset_top <= offset) offset_top = offset + 2;
}
-
- /* Handle a recursively called group. Restore the offsets
- appropriately and continue from after the call. */
-
- if (md->recursive != NULL && md->recursive->group_num == number)
- {
- recursion_info *rec = md->recursive;
- DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
- md->recursive = rec->prevrec;
- memcpy(md->offset_vector, rec->offset_save,
- rec->saved_max * sizeof(int));
- offset_top = rec->save_offset_top;
- ecode = rec->after_call;
- break;
- }
}
- /* For a non-repeating ket, just continue at this level. This also
- happens for a repeating ket if no characters were matched in the group.
- This is the forcible breaking of infinite loops as implemented in Perl
- 5.005. If there is an options reset, it will get obeyed in the normal
- course of events. */
+ /* For an ordinary non-repeating ket, just continue at this level. This
+ also happens for a repeating ket if no characters were matched in the
+ group. This is the forcible breaking of infinite loops as implemented in
+ Perl 5.005. For a non-repeating atomic group, establish a backup point by
+ processing the rest of the pattern at a lower level. If this results in a
+ NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby
+ bypassing intermediate backup points, but resetting any captures that
+ happened along the way. */
if (*ecode == OP_KET || eptr == saved_eptr)
{
- ecode += 1 + LINK_SIZE;
+ if (*prev == OP_ONCE)
+ {
+ RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
+ RRETURN(MATCH_ONCE);
+ }
+ ecode += 1 + LINK_SIZE; /* Carry on at this level */
break;
}
@@ -1805,12 +1779,20 @@ for (;;)
/* The normal repeating kets try the rest of the pattern or restart from
the preceding bracket, in the appropriate order. In the second case, we can
use tail recursion to avoid using another stack frame, unless we have an
- unlimited repeat of a group that can match an empty string. */
+ an atomic group or an unlimited repeat of a group that can match an empty
+ string. */
if (*ecode == OP_KETRMIN)
{
- RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
+ RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (*prev == OP_ONCE)
+ {
+ RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
+ RRETURN(MATCH_ONCE);
+ }
if (*prev >= OP_SBRA) /* Could match an empty string */
{
md->match_function_type = MATCH_CBEGROUP;
@@ -1824,7 +1806,15 @@ for (;;)
{
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
+ if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (*prev == OP_ONCE)
+ {
+ RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ md->once_target = prev;
+ RRETURN(MATCH_ONCE);
+ }
ecode += 1 + LINK_SIZE;
goto TAIL_RECURSE;
}
@@ -5707,7 +5697,8 @@ switch (frame->Xwhere)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
- LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
+ LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
+ LBL(65) LBL(66)
#ifdef SUPPORT_UTF8
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
LBL(32) LBL(34) LBL(42) LBL(46)
diff --git a/pcre_internal.h b/pcre_internal.h
index d427cd0..207e242 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1765,11 +1765,9 @@ call within the pattern. */
typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
- int group_num; /* Number of group that was called */
- const uschar *after_call; /* "Return value": points after the call in the expr */
- int *offset_save; /* Pointer to start of saved offsets */
- int saved_max; /* Number of saved offsets */
- int save_offset_top; /* Current value of offset_top */
+ int group_num; /* Number of group that was called */
+ int *offset_save; /* Pointer to start of saved offsets */
+ int saved_max; /* Number of saved offsets */
} recursion_info;
/* Structure for building a chain of data for holding the values of the subject
@@ -1827,6 +1825,7 @@ typedef struct match_data {
recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */
const uschar *mark; /* Mark pointer to pass back */
+ const uschar *once_target; /* Where to back up to for atomic groups */
} match_data;
/* A similar structure is used for the same purpose by the DFA matching
diff --git a/testdata/testinput1 b/testdata/testinput1
index 29489c2..d3dd7ad 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -4157,4 +4157,22 @@
/(?>(?>(a))b|(a)c)/
ac
+/(?:(?>([ab])))+a=/+
+ =ba=
+
+/(?>([ab]))+a=/+
+ =ba=
+
+/((?>(a+)b)+(aabab))/
+ aaaabaaabaabab
+
+/(?>a+|ab)+?c/
+ aabc
+
+/(?>a+|ab)+c/
+ aabc
+
+/(?:a+|ab)+c/
+ aabc
+
/-- End of testinput1 --/
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9e3d9a5..489f85a 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3757,4 +3757,10 @@ with \Y. ---/
c
c\N
+/(?:(?>(a)))+a%/++
+ %aa%
+
+/(a)b|ac/++
+ ac\O3
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 6804064..8179518 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -6801,4 +6801,35 @@ No match
1: <unset>
2: a
+/(?:(?>([ab])))+a=/+
+ =ba=
+ 0: ba=
+ 0+
+ 1: b
+
+/(?>([ab]))+a=/+
+ =ba=
+ 0: ba=
+ 0+
+ 1: b
+
+/((?>(a+)b)+(aabab))/
+ aaaabaaabaabab
+ 0: aaaabaaabaabab
+ 1: aaaabaaabaabab
+ 2: aaa
+ 3: aabab
+
+/(?>a+|ab)+?c/
+ aabc
+No match
+
+/(?>a+|ab)+c/
+ aabc
+No match
+
+/(?:a+|ab)+c/
+ aabc
+ 0: aabc
+
/-- End of testinput1 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index c6fa025..5618368 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -11895,4 +11895,17 @@ No match
c\N
No match
+/(?:(?>(a)))+a%/++
+ %aa%
+ 0: aa%
+ 0+
+ 1: a
+ 1+ a%
+
+/(a)b|ac/++
+ ac\O3
+Matched, but too many substrings
+ 0: ac
+ 0+
+
/-- End of testinput2 --/