summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-01-15 18:45:27 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-01-15 18:45:27 +0000
commit5e65b4b723e9252019890e3e6118e5b4fab5169d (patch)
treef0c10176e136bfb95d7777a0c690c2b8d62d7f94
parent91aad13b2f1763201de03b6667a9fb869103fa39 (diff)
downloadpcre-5e65b4b723e9252019890e3e6118e5b4fab5169d.tar.gz
Fix nested *MARK bug (nothing shown for /(?=(*:x))((*:y)q|)/ etc.)
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@882 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_exec.c9
-rw-r--r--testdata/testinput214
-rw-r--r--testdata/testoutput228
4 files changed, 56 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 9911047..c36055e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -38,6 +38,12 @@ Version 8.30
10. Get rid of a number of -Wunused-but-set-variable warnings.
+11. The pattern /(?=(*:x))(q|)/ matches an empty string, and returns the mark
+ "x". The similar pattern /(?=(*:x))((*:y)q|)/ did not return a mark at all.
+ Oddly, Perl behaves the same way. PCRE has been fixed so that this pattern
+ also returns the mark "x". This bug applied to capturing parentheses,
+ non-capturing parentheses, and atomic parentheses.
+
Version 8.21 12-Dec-2011
------------------------
diff --git a/pcre_exec.c b/pcre_exec.c
index ecb7cdc..d5363fb 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -626,6 +626,7 @@ the alternative names that are used. */
#define condassert condition
#define matched_once prev_is_word
#define foc number
+#define save_mark data
/* These statements are here to stop the compiler complaining about unitialized
variables. */
@@ -818,6 +819,7 @@ for (;;)
case OP_ONCE_NC:
prev = ecode;
saved_eptr = eptr;
+ save_mark = md->mark;
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
@@ -836,6 +838,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode,1);
+ md->mark = save_mark;
}
while (*ecode == OP_ALT);
@@ -915,6 +918,7 @@ for (;;)
save_offset2 = md->offset_vector[offset+1];
save_offset3 = md->offset_vector[md->offset_end - number];
save_capture_last = md->capture_last;
+ save_mark = md->mark;
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
md->offset_vector[md->offset_end - number] =
@@ -951,6 +955,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->capture_last = save_capture_last;
ecode += GET(ecode, 1);
+ md->mark = save_mark;
if (*ecode != OP_ALT) break;
}
@@ -1016,9 +1021,10 @@ for (;;)
/* In all other cases, we have to make another call to match(). */
+ save_mark = md->mark;
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
RM2);
-
+
/* See comment in the code for capturing groups above about handling
THEN. */
@@ -1045,6 +1051,7 @@ for (;;)
RRETURN(rrc);
}
ecode += GET(ecode, 1);
+ md->mark = save_mark;
if (*ecode != OP_ALT) break;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5340ed0..b8483e2 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3580,5 +3580,19 @@ replaced by single letters. --/
/^a(*:X)bcde/K
abc\P
+
+/-- These are here because Perl doesn't return a mark, except for the first --/
+
+/(?=(*:x))(q|)/K+
+ abc
+
+/(?=(*:x))((*:y)q|)/K+
+ abc
+
+/(?=(*:x))(?:(*:y)q|)/K+
+ abc
+
+/(?=(*:x))(?>(*:y)q|)/K+
+ abc
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 739ebed..841b55f 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -11970,5 +11970,33 @@ No match, mark = m
/^a(*:X)bcde/K
abc\P
Partial match, mark=X: abc
+
+/-- These are here because Perl doesn't return a mark, except for the first --/
+
+/(?=(*:x))(q|)/K+
+ abc
+ 0:
+ 0+ abc
+ 1:
+MK: x
+
+/(?=(*:x))((*:y)q|)/K+
+ abc
+ 0:
+ 0+ abc
+ 1:
+MK: x
+
+/(?=(*:x))(?:(*:y)q|)/K+
+ abc
+ 0:
+ 0+ abc
+MK: x
+
+/(?=(*:x))(?>(*:y)q|)/K+
+ abc
+ 0:
+ 0+ abc
+MK: x
/-- End of testinput2 --/