diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-07 17:35:52 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-03-07 17:35:52 +0000 |
commit | 49c56a265833ec84377194fb95cf09b6592edf18 (patch) | |
tree | 2129dee13e6c99c3dab14f47769ac1289fa33b01 | |
parent | 6413b089ecc41c087743ea779015d9a1fc4bc9c6 (diff) | |
download | pcre-49c56a265833ec84377194fb95cf09b6592edf18.tar.gz |
Fix incorrect compile time error for certain types of recursive patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@503 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | pcre_compile.c | 38 | ||||
-rw-r--r-- | testdata/testinput11 | 10 | ||||
-rw-r--r-- | testdata/testoutput11 | 27 |
4 files changed, 74 insertions, 7 deletions
@@ -51,6 +51,12 @@ Version 8.02 01-Mar-2010 implementation of support for (*MARK) will need an extra pointer on the stack; I have reserved it now, so that the stack frame size does not decrease. + +13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other + item in branch that calls a recursion is a subroutine call - as in the + second branch in the above example - was incorrectly given the compile- + time error "recursive call could loop indefinitely" because pcre_compile() + was not correctly checking the subroutine for matching a non-empty string. Version 8.01 19-Jan-2010 diff --git a/pcre_compile.c b/pcre_compile.c index b9cc701..77b7c09 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -1785,12 +1785,14 @@ Arguments: code points to start of search endcode points to where to stop utf8 TRUE if in UTF8 mode + cd contains pointers to tables etc. Returns: TRUE if what is matched could be empty */ static BOOL -could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8) +could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8, + compile_data *cd) { register int c; for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE); @@ -1800,7 +1802,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE const uschar *ccode; c = *code; - + /* Skip over forward assertions; the other assertions are skipped by first_significant_code() with a TRUE final argument. */ @@ -1820,6 +1822,22 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE c = *code; continue; } + + /* For a recursion/subroutine call, if its end has been reached, which + implies a subroutine call, we can scan it. */ + + if (c == OP_RECURSE) + { + const uschar *scode = cd->start_code + GET(code, 1); + if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ + do + { + if (!could_be_empty_branch(scode, endcode, utf8, cd)) return FALSE; + scode += GET(scode, 1); + } + while (*scode == OP_ALT); + continue; + } /* For other groups, scan the branches. */ @@ -1839,7 +1857,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf8)) + if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd)) empty_branch = TRUE; code += GET(code, 1); } @@ -1973,6 +1991,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; break; #endif + + /* None of the remaining opcodes are required to match a character. */ + + default: + break; } } @@ -1995,17 +2018,18 @@ Arguments: endcode points to where to stop (current RECURSE item) bcptr points to the chain of current (unclosed) branch starts utf8 TRUE if in UTF-8 mode + cd pointers to tables etc Returns: TRUE if what is matched could be empty */ static BOOL could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, - BOOL utf8) + BOOL utf8, compile_data *cd) { while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd)) return FALSE; bcptr = bcptr->outer; } @@ -4363,7 +4387,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ uschar *scode = bracode; do { - if (could_be_empty_branch(scode, ketcode, utf8)) + if (could_be_empty_branch(scode, ketcode, utf8, cd)) { *bracode += OP_SBRA - OP_BRA; break; @@ -5176,7 +5200,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ recursion that could loop for ever, and diagnose that case. */ else if (GET(called, 1) == 0 && - could_be_empty(called, code, bcptr, utf8)) + could_be_empty(called, code, bcptr, utf8, cd)) { *errorcodeptr = ERR40; goto FAILED; diff --git a/testdata/testinput11 b/testdata/testinput11 index d1054ef..3543bf7 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -379,4 +379,14 @@ a(b)c a(b(c)d)e +/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/ + 0 + 00 + 0000 + +/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/ + 0 + 00 + 0000 + /-- End of testinput11 --/ diff --git a/testdata/testoutput11 b/testdata/testoutput11 index 4fb5efd..313b7cb 100644 --- a/testdata/testoutput11 +++ b/testdata/testoutput11 @@ -776,4 +776,31 @@ No match 0: a(b(c)d)e 1: e +/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/ + 0 + 0: 0 + 1: 0 + 00 + 0: 00 + 1: 00 + 2: 0 + 0000 + 0: 0000 + 1: 0000 + 2: 0 + +/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/ + 0 + 0: 0 + 1: 0 + 2: 0 + 00 + 0: 0 + 1: 0 + 2: 0 + 0000 + 0: 0 + 1: 0 + 2: 0 + /-- End of testinput11 --/ |