summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-03-03 11:14:26 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-03-03 11:14:26 +0000
commit54ece496846cbf09aa3af412fcd7ebde618b7467 (patch)
tree96ca0daa570e4b8c414835c43d3dc6476f6a87a6
parentd9fc8663fba279484ba2f0e30d8bcf024b364f00 (diff)
downloadpcre-54ece496846cbf09aa3af412fcd7ebde618b7467.tar.gz
Allow callout before assertion condition in a conditional group.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1266 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--doc/pcrecallout.336
-rw-r--r--doc/pcrepattern.313
-rw-r--r--pcre_compile.c21
-rw-r--r--testdata/testinput26
-rw-r--r--testdata/testoutput226
6 files changed, 87 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 9ab8fd8..d935b5f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -89,6 +89,10 @@ Version 8.33 xx-xxxx-201x
there was a conditional group that depended on an assertion, if the
assertion was false, the callout that immediately followed the alternation
in the condition was skipped when pcre_exec() was used for matching.
+
+23. Allow an explicit callout to be inserted before an assertion that is the
+ condition for a conditional group, for compatibility with automatic
+ callouts, which always insert a callout at this point.
Version 8.32 30-November-2012
diff --git a/doc/pcrecallout.3 b/doc/pcrecallout.3
index 946035d..19baf23 100644
--- a/doc/pcrecallout.3
+++ b/doc/pcrecallout.3
@@ -1,4 +1,4 @@
-.TH PCRECALLOUT 3 "13 January 2013" "PCRE 8.33"
+.TH PCRECALLOUT 3 "03 March 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -41,8 +41,17 @@ it is processed as if it were
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
.sp
Notice that there is a callout before and after each parenthesis and
-alternation bar. Automatic callouts can be used for tracking the progress of
-pattern matching. The
+alternation bar. If the pattern contains a conditional group whose condition is
+an assertion, an automatic callout is inserted immediately before the
+condition. Such a callout may also be inserted explicitly, for example:
+.sp
+ (?(?C9)(?=a)ab|de)
+.sp
+This applies only to assertion conditions (because they are themselves
+independent groups).
+.P
+Automatic callouts can be used for tracking the progress of pattern matching.
+The
.\" HREF
\fBpcretest\fP
.\"
@@ -115,10 +124,10 @@ automatically generated callouts).
.P
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
passed by the caller to the matching function. When \fBpcre_exec()\fP or
-\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to extract
-substrings that have been matched so far, in the same way as for extracting
-substrings after a match has completed. For the DFA matching functions, this
-field is not useful.
+\fBpcre[16|32]_exec()\fP is used, the contents can be inspected, in order to
+extract substrings that have been matched so far, in the same way as for
+extracting substrings after a match has completed. For the DFA matching
+functions, this field is not useful.
.P
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
that were passed to the matching function.
@@ -171,11 +180,12 @@ help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts.
.P
The \fImark\fP field is present from version 2 of the callout structure. In
-callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a pointer to
-the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
-(*THEN) item in the match, or NULL if no such items have been passed. Instances
-of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
-callouts from the DFA matching functions this field always contains NULL.
+callouts from \fBpcre_exec()\fP or \fBpcre[16|32]_exec()\fP it contains a
+pointer to the zero-terminated name of the most recently passed (*MARK),
+(*PRUNE), or (*THEN) item in the match, or NULL if no such items have been
+passed. Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
+previous (*MARK). In callouts from the DFA matching functions this field always
+contains NULL.
.
.
.SH "RETURN VALUES"
@@ -207,6 +217,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 13 January 2013
+Last updated: 03 March 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 9b3e471..73d256b 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "27 February 2013" "PCRE 8.33"
+.TH PCREPATTERN 3 "03 March 2013" "PCRE 8.33"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -2640,7 +2640,14 @@ For example, this pattern has two callout points:
.sp
If the PCRE_AUTO_CALLOUT flag is passed to a compiling function, callouts are
automatically installed before each item in the pattern. They are all numbered
-255.
+255. If there is a conditional group in the pattern whose condition is an
+assertion, an additional callout is inserted just before the condition. An
+explicit callout may also be set at this position, as in this example:
+.sp
+ (?(?C9)(?=a)abc|def)
+.sp
+Note that this applies only to assertion conditions, not to other types of
+condition.
.P
During matching, when PCRE reaches a callout point, the external function is
called. It is provided with the number of the callout, the position in the
@@ -2989,6 +2996,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 27 February 2013
+Last updated: 03 March 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/pcre_compile.c b/pcre_compile.c
index 4fd1678..4f17ba1 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5729,6 +5729,7 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
group), a name (referring to a named group), or 'R', referring to
@@ -5741,14 +5742,28 @@ for (;; ptr++)
be the recursive thing or the name 'R' (and similarly for 'R' followed
by digits), and (b) a number could be a name that consists of digits.
In both cases, we look for a name first; if not found, we try the other
- cases. */
+ cases.
+
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
+ used to check for an assertion condition. That's all that is needed! */
+
+ if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
+ {
+ for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
+ if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
- ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
+ (tempptr[2] == CHAR_EQUALS_SIGN ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
/* Most other conditions use OP_CREF (a couple change to OP_RREF
diff --git a/testdata/testinput2 b/testdata/testinput2
index f16444a..efb5741 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3845,4 +3845,10 @@ settings of the anchored and startline bits. --/
xxxx123a\P\P
xxxx123a\P
+/^(?(?=a)aa|bb)/C
+ bb
+
+/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
+ bb
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index cdb186e..aa38634 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12616,4 +12616,30 @@ Partial match: 123a
xxxx123a\P
Partial match: 123a
+/^(?(?=a)aa|bb)/C
+ bb
+--->bb
+ +0 ^ ^
+ +1 ^ (?(?=a)aa|bb)
+ +3 ^ (?=a)
+ +6 ^ a
++11 ^ b
++12 ^^ b
++13 ^ ^ )
++14 ^ ^
+ 0: bb
+
+/(?C1)^(?C2)(?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))(?C11)/
+ bb
+--->bb
+ 1 ^ ^
+ 2 ^ (?(?C99)(?=(?C3)a(?C4))(?C5)a(?C6)a(?C7)|(?C8)b(?C9)b(?C10))
+ 99 ^ (?=(?C3)a(?C4))
+ 3 ^ a
+ 8 ^ b
+ 9 ^^ b
+ 10 ^ ^ )
+ 11 ^ ^
+ 0: bb
+
/-- End of testinput2 --/