diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-18 10:36:45 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-18 10:36:45 +0000 |
commit | 0c3ee8182f352dc0aeecb56b3e2ab353891da04d (patch) | |
tree | 039533efcf3cf35b49047a6163d32249dc4e66e4 | |
parent | 039e2cbed923703d44e46a48ff13851bed11a6d8 (diff) | |
download | pcre-0c3ee8182f352dc0aeecb56b3e2ab353891da04d.tar.gz |
Fix bad compiling of possessively repeated conditional subpattern.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@749 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | pcre_compile.c | 94 | ||||
-rw-r--r-- | testdata/testinput1 | 6 | ||||
-rw-r--r-- | testdata/testinput2 | 4 | ||||
-rw-r--r-- | testdata/testoutput1 | 8 | ||||
-rw-r--r-- | testdata/testoutput2 | 37 |
6 files changed, 127 insertions, 25 deletions
@@ -33,6 +33,9 @@ Version 8.21 rejected were: (*ACCEPT), (*COMMIT), (*FAIL), (*MARK), (*PRUNE), (*SKIP), (*THEN), \h, \H, \v, \V, and single character negative classes with fixed repetitions, e.g. [^a]{3}, with and without PCRE_CASELESS. + +8. A possessively repeated conditional subpattern such as (?(?=c)c|d)++ was + being incorrectly compiled and would have given unpredicatble results. Version 8.20 21-Oct-2011 diff --git a/pcre_compile.c b/pcre_compile.c index 2a49b3b..c66cff1 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4424,7 +4424,7 @@ for (;; ptr++) past, but it no longer happens for non-repeated recursions. In fact, the repeated ones could be re-implemented independently so as not to need this, but for the moment we rely on the code for repeating groups. */ - + if (*previous == OP_RECURSE) { memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); @@ -4982,43 +4982,45 @@ for (;; ptr++) ONCE brackets can be converted into non-capturing brackets, as the behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to deal with possessive ONCEs specially. - - Otherwise, if the quantifier was possessive, we convert the BRA code to - the POS form, and the KET code to KETRPOS. (It turns out to be convenient - at runtime to detect this kind of subpattern at both the start and at the - end.) The use of special opcodes makes it possible to reduce greatly the - stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO, - convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that - the default action below, of wrapping everything inside atomic brackets, - does not happen. - - Then, when we are doing the actual compile phase, check to see whether - this group is one that could match an empty string. If so, convert the - initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime - checking can be done. [This check is also applied to ONCE groups at - runtime, but in a different way.] */ + + Otherwise, when we are doing the actual compile phase, check to see + whether this group is one that could match an empty string. If so, + convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so + that runtime checking can be done. [This check is also applied to ONCE + groups at runtime, but in a different way.] + + Then, if the quantifier was possessive and the bracket is not a + conditional, we convert the BRA code to the POS form, and the KET code to + KETRPOS. (It turns out to be convenient at runtime to detect this kind of + subpattern at both the start and at the end.) The use of special opcodes + makes it possible to reduce greatly the stack usage in pcre_exec(). If + the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. Then + cancel the possessive flag so that the default action below, of wrapping + everything inside atomic brackets, does not happen. */ else { uschar *ketcode = code - 1 - LINK_SIZE; uschar *bracode = ketcode - GET(ketcode, 1); + /* Convert possessive ONCE brackets to non-capturing */ + if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && possessive_quantifier) *bracode = OP_BRA; + /* For non-possessive ONCE brackets, all we need to do is to + set the KET. */ + if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) *ketcode = OP_KETRMAX + repeat_type; + + /* Handle non-ONCE brackets and possessive ONCEs (which have been + converted to non-capturing above). */ + else { - if (possessive_quantifier) - { - *bracode += 1; /* Switch to xxxPOS opcodes */ - *ketcode = OP_KETRPOS; - if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; - possessive_quantifier = FALSE; - } - else *ketcode = OP_KETRMAX + repeat_type; - + /* In the compile phase, check for empty string matching. */ + if (lengthptr == NULL) { uschar *scode = bracode; @@ -5033,6 +5035,48 @@ for (;; ptr++) } while (*scode == OP_ALT); } + + /* Handle possessive quantifiers. */ + + if (possessive_quantifier) + { + /* For COND brackets, we wrap the whole thing in a possessively + repeated non-capturing bracket, because we have not invented POS + versions of the COND opcodes. Because we are moving code along, we + must ensure that any pending recursive references are updated. */ + + if (*bracode == OP_COND || *bracode == OP_SCOND) + { + int nlen = (int)(code - bracode); + *code = OP_END; + adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); + memmove(bracode + 1+LINK_SIZE, bracode, nlen); + code += 1 + LINK_SIZE; + nlen += 1 + LINK_SIZE; + *bracode = OP_BRAPOS; + *code++ = OP_KETRPOS; + PUTINC(code, 0, nlen); + PUT(bracode, 1, nlen); + } + + /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ + + else + { + *bracode += 1; /* Switch to xxxPOS opcodes */ + *ketcode = OP_KETRPOS; + } + + /* If the minimum is zero, mark it as possessive, then unset the + possessive flag. */ + + if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; + possessive_quantifier = FALSE; + } + + /* Non-possessive quantifier */ + + else *ketcode = OP_KETRMAX + repeat_type; } } } diff --git a/testdata/testinput1 b/testdata/testinput1 index b24f900..5c65ed6 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -4285,4 +4285,10 @@ /(?<=a\v)c/ a\nc +/(?(?=c)c|d)++Y/ + XcccddYX + +/(?(?=c)c|d)*+Y/ + XcccddYX + /-- End of testinput1 --/ diff --git a/testdata/testinput2 b/testdata/testinput2 index ae822de..61bbeba 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4003,4 +4003,8 @@ AbcdCBefgBhiBqz *** Failers aAz +/(?(?=c)c|d)++Y/BZ + +/(?(?=c)c|d)*+Y/BZ + /-- End of testinput2 --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 0c2e84e..54c0bf2 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -7004,4 +7004,12 @@ No match a\nc 0: c +/(?(?=c)c|d)++Y/ + XcccddYX + 0: cccddY + +/(?(?=c)c|d)*+Y/ + XcccddYX + 0: cccddY + /-- End of testinput1 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 17e28bb..58874a7 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -12554,4 +12554,41 @@ No match aAz No match +/(?(?=c)c|d)++Y/BZ +------------------------------------------------------------------ + Bra + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + +/(?(?=c)c|d)*+Y/BZ +------------------------------------------------------------------ + Bra + Braposzero + BraPos + Cond + Assert + c + Ket + c + Alt + d + Ket + KetRpos + Y + Ket + End +------------------------------------------------------------------ + /-- End of testinput2 --/ |