diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-06 19:00:29 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-06 19:00:29 +0000 |
commit | 2878ed98d792c02f7c9f7b4832016f55ad1db1ee (patch) | |
tree | 4e353f00244bc898213a46fc21c8a066dbf1ff50 | |
parent | 83b2b44d38f1afd36c4b19e1afceea68e6216fbd (diff) | |
download | pcre-2878ed98d792c02f7c9f7b4832016f55ad1db1ee.tar.gz |
Fix bugs with \K in atomic groups, subroutines, and assertions.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@500 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 6 | ||||
-rw-r--r-- | pcre_compile.c | 1 | ||||
-rw-r--r-- | pcre_exec.c | 27 | ||||
-rw-r--r-- | pcre_internal.h | 1 | ||||
-rw-r--r-- | testdata/testinput11 | 22 | ||||
-rw-r--r-- | testdata/testinput2 | 22 | ||||
-rw-r--r-- | testdata/testoutput11 | 34 | ||||
-rw-r--r-- | testdata/testoutput2 | 28 |
9 files changed, 136 insertions, 12 deletions
@@ -38,6 +38,13 @@ Version 8.02 01-Mar-2010 counting zeros. There was no check for running off the end of the string, which could happen if a new error number was added without updating the string. + +10. \K gave a compile-time error if it appeared in a lookbehind assersion. + +11. \K was not working if it appeared in an atomic group or in a group that + was called as a "subroutine", or in an assertion. Perl 5.11 documents that + \K is "not well defined" if used in an assertion. PCRE now accepts it if + the assertion is positive, but not if it is negative. Version 8.01 19-Jan-2010 diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index c5f20eb..8849872 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -737,6 +737,10 @@ For example, when the pattern (foo)\eKbar .sp matches "foobar", the first substring is still set to "foo". +.P +Perl documents that the use of \eK within assertions is "not well defined". In +PCRE, \eK is acted upon when it occurs inside positive assertions, but is +ignored in negative assertions. . . .\" HTML <a name="smallassertions"></a> @@ -2453,6 +2457,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 01 March 2010 +Last updated: 06 March 2010 Copyright (c) 1997-2010 University of Cambridge. .fi diff --git a/pcre_compile.c b/pcre_compile.c index e89819a..b9cc701 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -1450,6 +1450,7 @@ for (;;) case OP_CALLOUT: case OP_SOD: case OP_SOM: + case OP_SET_SOM: case OP_EOD: case OP_EODN: case OP_CIRC: diff --git a/pcre_exec.c b/pcre_exec.c index c3bb970..2734723 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -1070,7 +1070,6 @@ for (;;) memmove(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); offset_top = rec->save_offset_top; - mstart = rec->save_start; ims = original_ims; ecode = rec->after_call; break; @@ -1114,7 +1113,11 @@ for (;;) { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0, RM4); - if (rrc == MATCH_MATCH) break; + if (rrc == MATCH_MATCH) + { + mstart = md->start_match_ptr; /* In case \K reset it */ + break; + } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode, 1); } @@ -1267,9 +1270,7 @@ for (;;) memcpy(new_recursive.offset_save, md->offset_vector, new_recursive.saved_max * sizeof(int)); - new_recursive.save_start = mstart; new_recursive.save_offset_top = offset_top; - mstart = eptr; /* OK, now we can do the recursion. For each top-level alternative we restore the offset and recursion data. */ @@ -1316,7 +1317,8 @@ for (;;) a move back into the brackets. Friedl calls these "atomic" subpatterns. Check the alternative branches in turn - the matching won't pass the KET for this kind of subpattern. If any one branch matches, we carry on as at - the end of a normal bracket, leaving the subject pointer. */ + the end of a normal bracket, leaving the subject pointer, but resetting + the start-of-match value in case it was changed by \K. */ case OP_ONCE: prev = ecode; @@ -1325,7 +1327,11 @@ for (;;) do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7); - if (rrc == MATCH_MATCH) break; + if (rrc == MATCH_MATCH) + { + mstart = md->start_match_ptr; + break; + } if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode,1); } @@ -1444,9 +1450,10 @@ for (;;) } else saved_eptr = NULL; - /* If we are at the end of an assertion group, stop matching and return - MATCH_MATCH, but record the current high water mark for use by positive - assertions. Do this also for the "once" (atomic) groups. */ + /* If we are at the end of an assertion group or an atomic group, stop + matching and return MATCH_MATCH, but record the current high water mark for + use by positive assertions. We also need to record the match start in case + it was changed by \K. */ if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || @@ -1454,6 +1461,7 @@ for (;;) { md->end_match_ptr = eptr; /* For ONCE */ md->end_offset_top = offset_top; + md->start_match_ptr = mstart; RRETURN(MATCH_MATCH); } @@ -1490,7 +1498,6 @@ for (;;) recursion_info *rec = md->recursive; DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); md->recursive = rec->prevrec; - mstart = rec->save_start; memcpy(md->offset_vector, rec->offset_save, rec->saved_max * sizeof(int)); offset_top = rec->save_offset_top; diff --git a/pcre_internal.h b/pcre_internal.h index 3cb8b46..67a3475 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1617,7 +1617,6 @@ typedef struct recursion_info { struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ int group_num; /* Number of group that was called */ const uschar *after_call; /* "Return value": points after the call in the expr */ - USPTR save_start; /* Old value of mstart */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */ int save_offset_top; /* Current value of offset_top */ diff --git a/testdata/testinput11 b/testdata/testinput11 index 501ac3c..d1054ef 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -357,4 +357,26 @@ /^(?(?!a(*SKIP)b))/ ac +/(?>a\Kb)/ + ab + +/((?>a\Kb))/ + ab + +/(a\Kb)/ + ab + +/^a\Kcz|ac/ + ac + +/(?>a\Kbz|ab)/ + ab + +/^(?&t)(?(DEFINE)(?<t>a\Kb))$/ + ab + +/^([^()]|\((?1)*\))*$/ + a(b)c + a(b(c)d)e + /-- End of testinput11 --/ diff --git a/testdata/testinput2 b/testdata/testinput2 index 5233183..dc20e82 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3208,4 +3208,26 @@ a random value. /Ix /^(?&t)*(?(DEFINE)(?<t>.))$/BZ +/ -- The first four of these are not in the Perl 5.10 test because Perl + documents that the use of \K in assertions is "not well defined". The + last is here because Perl gives the match as "b" rather than "ab". I + believe this to be a Perl bug. --/ + +/(?=a\Kb)ab/ + ab + +/(?!a\Kb)ac/ + ac + +/^abc(?<=b\Kc)d/ + abcd + +/^abc(?<!b\Kq)d/ + abcd + +/(?>a\Kb)z|(ab)/ + ab + +/----------------------/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput11 b/testdata/testoutput11 index e5d3df7..4fb5efd 100644 --- a/testdata/testoutput11 +++ b/testdata/testoutput11 @@ -742,4 +742,38 @@ No match ac 0: +/(?>a\Kb)/ + ab + 0: b + +/((?>a\Kb))/ + ab + 0: b + 1: ab + +/(a\Kb)/ + ab + 0: b + 1: ab + +/^a\Kcz|ac/ + ac + 0: ac + +/(?>a\Kbz|ab)/ + ab + 0: ab + +/^(?&t)(?(DEFINE)(?<t>a\Kb))$/ + ab + 0: b + +/^([^()]|\((?1)*\))*$/ + a(b)c + 0: a(b)c + 1: c + a(b(c)d)e + 0: a(b(c)d)e + 1: e + /-- End of testinput11 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8271ca6..1b3c2f9 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -10637,4 +10637,32 @@ No match End ------------------------------------------------------------------ +/ -- The first four of these are not in the Perl 5.10 test because Perl + documents that the use of \K in assertions is "not well defined". The + last is here because Perl gives the match as "b" rather than "ab". I + believe this to be a Perl bug. --/ + +/(?=a\Kb)ab/ + ab + 0: b + +/(?!a\Kb)ac/ + ac + 0: ac + +/^abc(?<=b\Kc)d/ + abcd + 0: cd + +/^abc(?<!b\Kq)d/ + abcd + 0: abcd + +/(?>a\Kb)z|(ab)/ + ab + 0: ab + 1: ab + +/----------------------/ + /-- End of testinput2 --/ |