summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-18 10:36:45 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-18 10:36:45 +0000
commit0c3ee8182f352dc0aeecb56b3e2ab353891da04d (patch)
tree039533efcf3cf35b49047a6163d32249dc4e66e4
parent039e2cbed923703d44e46a48ff13851bed11a6d8 (diff)
downloadpcre-0c3ee8182f352dc0aeecb56b3e2ab353891da04d.tar.gz
Fix bad compiling of possessively repeated conditional subpattern.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@749 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--pcre_compile.c94
-rw-r--r--testdata/testinput16
-rw-r--r--testdata/testinput24
-rw-r--r--testdata/testoutput18
-rw-r--r--testdata/testoutput237
6 files changed, 127 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index 6c40540..7d3f451 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -33,6 +33,9 @@ Version 8.21
rejected were: (*ACCEPT), (*COMMIT), (*FAIL), (*MARK), (*PRUNE), (*SKIP),
(*THEN), \h, \H, \v, \V, and single character negative classes with fixed
repetitions, e.g. [^a]{3}, with and without PCRE_CASELESS.
+
+8. A possessively repeated conditional subpattern such as (?(?=c)c|d)++ was
+ being incorrectly compiled and would have given unpredicatble results.
Version 8.20 21-Oct-2011
diff --git a/pcre_compile.c b/pcre_compile.c
index 2a49b3b..c66cff1 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4424,7 +4424,7 @@ for (;; ptr++)
past, but it no longer happens for non-repeated recursions. In fact, the
repeated ones could be re-implemented independently so as not to need this,
but for the moment we rely on the code for repeating groups. */
-
+
if (*previous == OP_RECURSE)
{
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
@@ -4982,43 +4982,45 @@ for (;; ptr++)
ONCE brackets can be converted into non-capturing brackets, as the
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
deal with possessive ONCEs specially.
-
- Otherwise, if the quantifier was possessive, we convert the BRA code to
- the POS form, and the KET code to KETRPOS. (It turns out to be convenient
- at runtime to detect this kind of subpattern at both the start and at the
- end.) The use of special opcodes makes it possible to reduce greatly the
- stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO,
- convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that
- the default action below, of wrapping everything inside atomic brackets,
- does not happen.
-
- Then, when we are doing the actual compile phase, check to see whether
- this group is one that could match an empty string. If so, convert the
- initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime
- checking can be done. [This check is also applied to ONCE groups at
- runtime, but in a different way.] */
+
+ Otherwise, when we are doing the actual compile phase, check to see
+ whether this group is one that could match an empty string. If so,
+ convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
+ that runtime checking can be done. [This check is also applied to ONCE
+ groups at runtime, but in a different way.]
+
+ Then, if the quantifier was possessive and the bracket is not a
+ conditional, we convert the BRA code to the POS form, and the KET code to
+ KETRPOS. (It turns out to be convenient at runtime to detect this kind of
+ subpattern at both the start and at the end.) The use of special opcodes
+ makes it possible to reduce greatly the stack usage in pcre_exec(). If
+ the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. Then
+ cancel the possessive flag so that the default action below, of wrapping
+ everything inside atomic brackets, does not happen. */
else
{
uschar *ketcode = code - 1 - LINK_SIZE;
uschar *bracode = ketcode - GET(ketcode, 1);
+ /* Convert possessive ONCE brackets to non-capturing */
+
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
possessive_quantifier) *bracode = OP_BRA;
+ /* For non-possessive ONCE brackets, all we need to do is to
+ set the KET. */
+
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
*ketcode = OP_KETRMAX + repeat_type;
+
+ /* Handle non-ONCE brackets and possessive ONCEs (which have been
+ converted to non-capturing above). */
+
else
{
- if (possessive_quantifier)
- {
- *bracode += 1; /* Switch to xxxPOS opcodes */
- *ketcode = OP_KETRPOS;
- if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
- possessive_quantifier = FALSE;
- }
- else *ketcode = OP_KETRMAX + repeat_type;
-
+ /* In the compile phase, check for empty string matching. */
+
if (lengthptr == NULL)
{
uschar *scode = bracode;
@@ -5033,6 +5035,48 @@ for (;; ptr++)
}
while (*scode == OP_ALT);
}
+
+ /* Handle possessive quantifiers. */
+
+ if (possessive_quantifier)
+ {
+ /* For COND brackets, we wrap the whole thing in a possessively
+ repeated non-capturing bracket, because we have not invented POS
+ versions of the COND opcodes. Because we are moving code along, we
+ must ensure that any pending recursive references are updated. */
+
+ if (*bracode == OP_COND || *bracode == OP_SCOND)
+ {
+ int nlen = (int)(code - bracode);
+ *code = OP_END;
+ adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
+ memmove(bracode + 1+LINK_SIZE, bracode, nlen);
+ code += 1 + LINK_SIZE;
+ nlen += 1 + LINK_SIZE;
+ *bracode = OP_BRAPOS;
+ *code++ = OP_KETRPOS;
+ PUTINC(code, 0, nlen);
+ PUT(bracode, 1, nlen);
+ }
+
+ /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
+
+ else
+ {
+ *bracode += 1; /* Switch to xxxPOS opcodes */
+ *ketcode = OP_KETRPOS;
+ }
+
+ /* If the minimum is zero, mark it as possessive, then unset the
+ possessive flag. */
+
+ if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
+ possessive_quantifier = FALSE;
+ }
+
+ /* Non-possessive quantifier */
+
+ else *ketcode = OP_KETRMAX + repeat_type;
}
}
}
diff --git a/testdata/testinput1 b/testdata/testinput1
index b24f900..5c65ed6 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -4285,4 +4285,10 @@
/(?<=a\v)c/
a\nc
+/(?(?=c)c|d)++Y/
+ XcccddYX
+
+/(?(?=c)c|d)*+Y/
+ XcccddYX
+
/-- End of testinput1 --/
diff --git a/testdata/testinput2 b/testdata/testinput2
index ae822de..61bbeba 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4003,4 +4003,8 @@ AbcdCBefgBhiBqz
*** Failers
aAz
+/(?(?=c)c|d)++Y/BZ
+
+/(?(?=c)c|d)*+Y/BZ
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 0c2e84e..54c0bf2 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -7004,4 +7004,12 @@ No match
a\nc
0: c
+/(?(?=c)c|d)++Y/
+ XcccddYX
+ 0: cccddY
+
+/(?(?=c)c|d)*+Y/
+ XcccddYX
+ 0: cccddY
+
/-- End of testinput1 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 17e28bb..58874a7 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -12554,4 +12554,41 @@ No match
aAz
No match
+/(?(?=c)c|d)++Y/BZ
+------------------------------------------------------------------
+ Bra
+ BraPos
+ Cond
+ Assert
+ c
+ Ket
+ c
+ Alt
+ d
+ Ket
+ KetRpos
+ Y
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?(?=c)c|d)*+Y/BZ
+------------------------------------------------------------------
+ Bra
+ Braposzero
+ BraPos
+ Cond
+ Assert
+ c
+ Ket
+ c
+ Alt
+ d
+ Ket
+ KetRpos
+ Y
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput2 --/