summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-06 19:00:29 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-06 19:00:29 +0000
commit2878ed98d792c02f7c9f7b4832016f55ad1db1ee (patch)
tree4e353f00244bc898213a46fc21c8a066dbf1ff50
parent83b2b44d38f1afd36c4b19e1afceea68e6216fbd (diff)
downloadpcre-2878ed98d792c02f7c9f7b4832016f55ad1db1ee.tar.gz
Fix bugs with \K in atomic groups, subroutines, and assertions.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@500 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog7
-rw-r--r--doc/pcrepattern.36
-rw-r--r--pcre_compile.c1
-rw-r--r--pcre_exec.c27
-rw-r--r--pcre_internal.h1
-rw-r--r--testdata/testinput1122
-rw-r--r--testdata/testinput222
-rw-r--r--testdata/testoutput1134
-rw-r--r--testdata/testoutput228
9 files changed, 136 insertions, 12 deletions
diff --git a/ChangeLog b/ChangeLog
index 33d6ea0..f14172c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -38,6 +38,13 @@ Version 8.02 01-Mar-2010
counting zeros. There was no check for running off the end of the string,
which could happen if a new error number was added without updating the
string.
+
+10. \K gave a compile-time error if it appeared in a lookbehind assersion.
+
+11. \K was not working if it appeared in an atomic group or in a group that
+ was called as a "subroutine", or in an assertion. Perl 5.11 documents that
+ \K is "not well defined" if used in an assertion. PCRE now accepts it if
+ the assertion is positive, but not if it is negative.
Version 8.01 19-Jan-2010
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index c5f20eb..8849872 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -737,6 +737,10 @@ For example, when the pattern
(foo)\eKbar
.sp
matches "foobar", the first substring is still set to "foo".
+.P
+Perl documents that the use of \eK within assertions is "not well defined". In
+PCRE, \eK is acted upon when it occurs inside positive assertions, but is
+ignored in negative assertions.
.
.
.\" HTML <a name="smallassertions"></a>
@@ -2453,6 +2457,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 01 March 2010
+Last updated: 06 March 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
diff --git a/pcre_compile.c b/pcre_compile.c
index e89819a..b9cc701 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1450,6 +1450,7 @@ for (;;)
case OP_CALLOUT:
case OP_SOD:
case OP_SOM:
+ case OP_SET_SOM:
case OP_EOD:
case OP_EODN:
case OP_CIRC:
diff --git a/pcre_exec.c b/pcre_exec.c
index c3bb970..2734723 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1070,7 +1070,6 @@ for (;;)
memmove(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
offset_top = rec->save_offset_top;
- mstart = rec->save_start;
ims = original_ims;
ecode = rec->after_call;
break;
@@ -1114,7 +1113,11 @@ for (;;)
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
RM4);
- if (rrc == MATCH_MATCH) break;
+ if (rrc == MATCH_MATCH)
+ {
+ mstart = md->start_match_ptr; /* In case \K reset it */
+ break;
+ }
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode, 1);
}
@@ -1267,9 +1270,7 @@ for (;;)
memcpy(new_recursive.offset_save, md->offset_vector,
new_recursive.saved_max * sizeof(int));
- new_recursive.save_start = mstart;
new_recursive.save_offset_top = offset_top;
- mstart = eptr;
/* OK, now we can do the recursion. For each top-level alternative we
restore the offset and recursion data. */
@@ -1316,7 +1317,8 @@ for (;;)
a move back into the brackets. Friedl calls these "atomic" subpatterns.
Check the alternative branches in turn - the matching won't pass the KET
for this kind of subpattern. If any one branch matches, we carry on as at
- the end of a normal bracket, leaving the subject pointer. */
+ the end of a normal bracket, leaving the subject pointer, but resetting
+ the start-of-match value in case it was changed by \K. */
case OP_ONCE:
prev = ecode;
@@ -1325,7 +1327,11 @@ for (;;)
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
- if (rrc == MATCH_MATCH) break;
+ if (rrc == MATCH_MATCH)
+ {
+ mstart = md->start_match_ptr;
+ break;
+ }
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode,1);
}
@@ -1444,9 +1450,10 @@ for (;;)
}
else saved_eptr = NULL;
- /* If we are at the end of an assertion group, stop matching and return
- MATCH_MATCH, but record the current high water mark for use by positive
- assertions. Do this also for the "once" (atomic) groups. */
+ /* If we are at the end of an assertion group or an atomic group, stop
+ matching and return MATCH_MATCH, but record the current high water mark for
+ use by positive assertions. We also need to record the match start in case
+ it was changed by \K. */
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
@@ -1454,6 +1461,7 @@ for (;;)
{
md->end_match_ptr = eptr; /* For ONCE */
md->end_offset_top = offset_top;
+ md->start_match_ptr = mstart;
RRETURN(MATCH_MATCH);
}
@@ -1490,7 +1498,6 @@ for (;;)
recursion_info *rec = md->recursive;
DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
md->recursive = rec->prevrec;
- mstart = rec->save_start;
memcpy(md->offset_vector, rec->offset_save,
rec->saved_max * sizeof(int));
offset_top = rec->save_offset_top;
diff --git a/pcre_internal.h b/pcre_internal.h
index 3cb8b46..67a3475 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1617,7 +1617,6 @@ typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
int group_num; /* Number of group that was called */
const uschar *after_call; /* "Return value": points after the call in the expr */
- USPTR save_start; /* Old value of mstart */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
int save_offset_top; /* Current value of offset_top */
diff --git a/testdata/testinput11 b/testdata/testinput11
index 501ac3c..d1054ef 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -357,4 +357,26 @@
/^(?(?!a(*SKIP)b))/
ac
+/(?>a\Kb)/
+ ab
+
+/((?>a\Kb))/
+ ab
+
+/(a\Kb)/
+ ab
+
+/^a\Kcz|ac/
+ ac
+
+/(?>a\Kbz|ab)/
+ ab
+
+/^(?&t)(?(DEFINE)(?<t>a\Kb))$/
+ ab
+
+/^([^()]|\((?1)*\))*$/
+ a(b)c
+ a(b(c)d)e
+
/-- End of testinput11 --/
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5233183..dc20e82 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3208,4 +3208,26 @@ a random value. /Ix
/^(?&t)*(?(DEFINE)(?<t>.))$/BZ
+/ -- The first four of these are not in the Perl 5.10 test because Perl
+ documents that the use of \K in assertions is "not well defined". The
+ last is here because Perl gives the match as "b" rather than "ab". I
+ believe this to be a Perl bug. --/
+
+/(?=a\Kb)ab/
+ ab
+
+/(?!a\Kb)ac/
+ ac
+
+/^abc(?<=b\Kc)d/
+ abcd
+
+/^abc(?<!b\Kq)d/
+ abcd
+
+/(?>a\Kb)z|(ab)/
+ ab
+
+/----------------------/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput11 b/testdata/testoutput11
index e5d3df7..4fb5efd 100644
--- a/testdata/testoutput11
+++ b/testdata/testoutput11
@@ -742,4 +742,38 @@ No match
ac
0:
+/(?>a\Kb)/
+ ab
+ 0: b
+
+/((?>a\Kb))/
+ ab
+ 0: b
+ 1: ab
+
+/(a\Kb)/
+ ab
+ 0: b
+ 1: ab
+
+/^a\Kcz|ac/
+ ac
+ 0: ac
+
+/(?>a\Kbz|ab)/
+ ab
+ 0: ab
+
+/^(?&t)(?(DEFINE)(?<t>a\Kb))$/
+ ab
+ 0: b
+
+/^([^()]|\((?1)*\))*$/
+ a(b)c
+ 0: a(b)c
+ 1: c
+ a(b(c)d)e
+ 0: a(b(c)d)e
+ 1: e
+
/-- End of testinput11 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 8271ca6..1b3c2f9 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -10637,4 +10637,32 @@ No match
End
------------------------------------------------------------------
+/ -- The first four of these are not in the Perl 5.10 test because Perl
+ documents that the use of \K in assertions is "not well defined". The
+ last is here because Perl gives the match as "b" rather than "ab". I
+ believe this to be a Perl bug. --/
+
+/(?=a\Kb)ab/
+ ab
+ 0: b
+
+/(?!a\Kb)ac/
+ ac
+ 0: ac
+
+/^abc(?<=b\Kc)d/
+ abcd
+ 0: cd
+
+/^abc(?<!b\Kq)d/
+ abcd
+ 0: abcd
+
+/(?>a\Kb)z|(ab)/
+ ab
+ 0: ab
+ 1: ab
+
+/----------------------/
+
/-- End of testinput2 --/