summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-07 17:35:52 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-03-07 17:35:52 +0000
commit49c56a265833ec84377194fb95cf09b6592edf18 (patch)
tree2129dee13e6c99c3dab14f47769ac1289fa33b01
parent6413b089ecc41c087743ea779015d9a1fc4bc9c6 (diff)
downloadpcre-49c56a265833ec84377194fb95cf09b6592edf18.tar.gz
Fix incorrect compile time error for certain types of recursive patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@503 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_compile.c38
-rw-r--r--testdata/testinput1110
-rw-r--r--testdata/testoutput1127
4 files changed, 74 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 20ee4dc..0f19c4c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -51,6 +51,12 @@ Version 8.02 01-Mar-2010
implementation of support for (*MARK) will need an extra pointer on the
stack; I have reserved it now, so that the stack frame size does not
decrease.
+
+13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other
+ item in branch that calls a recursion is a subroutine call - as in the
+ second branch in the above example - was incorrectly given the compile-
+ time error "recursive call could loop indefinitely" because pcre_compile()
+ was not correctly checking the subroutine for matching a non-empty string.
Version 8.01 19-Jan-2010
diff --git a/pcre_compile.c b/pcre_compile.c
index b9cc701..77b7c09 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1785,12 +1785,14 @@ Arguments:
code points to start of search
endcode points to where to stop
utf8 TRUE if in UTF8 mode
+ cd contains pointers to tables etc.
Returns: TRUE if what is matched could be empty
*/
static BOOL
-could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
+could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
+ compile_data *cd)
{
register int c;
for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
@@ -1800,7 +1802,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
const uschar *ccode;
c = *code;
-
+
/* Skip over forward assertions; the other assertions are skipped by
first_significant_code() with a TRUE final argument. */
@@ -1820,6 +1822,22 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
c = *code;
continue;
}
+
+ /* For a recursion/subroutine call, if its end has been reached, which
+ implies a subroutine call, we can scan it. */
+
+ if (c == OP_RECURSE)
+ {
+ const uschar *scode = cd->start_code + GET(code, 1);
+ if (GET(scode, 1) == 0) return TRUE; /* Unclosed */
+ do
+ {
+ if (!could_be_empty_branch(scode, endcode, utf8, cd)) return FALSE;
+ scode += GET(scode, 1);
+ }
+ while (*scode == OP_ALT);
+ continue;
+ }
/* For other groups, scan the branches. */
@@ -1839,7 +1857,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
empty_branch = FALSE;
do
{
- if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
empty_branch = TRUE;
code += GET(code, 1);
}
@@ -1973,6 +1991,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
break;
#endif
+
+ /* None of the remaining opcodes are required to match a character. */
+
+ default:
+ break;
}
}
@@ -1995,17 +2018,18 @@ Arguments:
endcode points to where to stop (current RECURSE item)
bcptr points to the chain of current (unclosed) branch starts
utf8 TRUE if in UTF-8 mode
+ cd pointers to tables etc
Returns: TRUE if what is matched could be empty
*/
static BOOL
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
- BOOL utf8)
+ BOOL utf8, compile_data *cd)
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
return FALSE;
bcptr = bcptr->outer;
}
@@ -4363,7 +4387,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
uschar *scode = bracode;
do
{
- if (could_be_empty_branch(scode, ketcode, utf8))
+ if (could_be_empty_branch(scode, ketcode, utf8, cd))
{
*bracode += OP_SBRA - OP_BRA;
break;
@@ -5176,7 +5200,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
recursion that could loop for ever, and diagnose that case. */
else if (GET(called, 1) == 0 &&
- could_be_empty(called, code, bcptr, utf8))
+ could_be_empty(called, code, bcptr, utf8, cd))
{
*errorcodeptr = ERR40;
goto FAILED;
diff --git a/testdata/testinput11 b/testdata/testinput11
index d1054ef..3543bf7 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -379,4 +379,14 @@
a(b)c
a(b(c)d)e
+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+ 0
+ 00
+ 0000
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+ 0
+ 00
+ 0000
+
/-- End of testinput11 --/
diff --git a/testdata/testoutput11 b/testdata/testoutput11
index 4fb5efd..313b7cb 100644
--- a/testdata/testoutput11
+++ b/testdata/testoutput11
@@ -776,4 +776,31 @@ No match
0: a(b(c)d)e
1: e
+/(?P<L1>(?P<L2>0)(?P>L1)|(?P>L2))/
+ 0
+ 0: 0
+ 1: 0
+ 00
+ 0: 00
+ 1: 00
+ 2: 0
+ 0000
+ 0: 0000
+ 1: 0000
+ 2: 0
+
+/(?P<L1>(?P<L2>0)|(?P>L2)(?P>L1))/
+ 0
+ 0: 0
+ 1: 0
+ 2: 0
+ 00
+ 0: 0
+ 1: 0
+ 2: 0
+ 0000
+ 0: 0
+ 1: 0
+ 2: 0
+
/-- End of testinput11 --/