summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-04-13 09:31:55 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2015-04-13 09:31:55 +0000
commite75637b2efc61520e5bb4a99643620a27d4b036a (patch)
treeb96a41d90b3ac9194b1c820845b1557d5ddac134
parent6719c2cdeb7670d4bf10f15a8511ca15af7ea595 (diff)
downloadpcre-e75637b2efc61520e5bb4a99643620a27d4b036a.tar.gz
Fix slow study when much mutual recursion.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1547 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_compile.c8
-rw-r--r--pcre_internal.h7
-rw-r--r--pcre_study.c68
-rw-r--r--testdata/testinput22
-rw-r--r--testdata/testoutput22
6 files changed, 69 insertions, 24 deletions
diff --git a/ChangeLog b/ChangeLog
index a99591f..816a390 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -158,6 +158,12 @@ Version 8.37 xx-xxx-2015
other cases where backtracking after \C could crash. This set of bugs was
discovered by the LLVM fuzzer.
+20. The function for finding the minimum length of a matching string could take
+ a very long time if mutual recursion was present many times in a pattern,
+ for example, /((?2){73}(?2))((?1))/. A better mutual recursion detection
+ method has been implemented. This infelicity was discovered by the LLVM
+ fuzzer.
+
Version 8.36 26-September-2014
------------------------------
diff --git a/pcre_compile.c b/pcre_compile.c
index 6510835..5d819f0 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -866,14 +866,6 @@ static const pcre_uint8 opcode_possessify[] = {
};
-/* Structure for mutual recursion detection. */
-
-typedef struct recurse_check {
- struct recurse_check *prev;
- const pcre_uchar *group;
-} recurse_check;
-
-
/*************************************************
* Find an error text *
diff --git a/pcre_internal.h b/pcre_internal.h
index 1c5f4ce..871520e 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -2460,6 +2460,13 @@ typedef struct branch_chain {
pcre_uchar *current_branch;
} branch_chain;
+/* Structure for mutual recursion detection. */
+
+typedef struct recurse_check {
+ struct recurse_check *prev;
+ const pcre_uchar *group;
+} recurse_check;
+
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_exec(). */
diff --git a/pcre_study.c b/pcre_study.c
index a2458c4..a4c7428 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -70,7 +70,7 @@ Arguments:
code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern's code
options the compiling options
- int RECURSE depth
+ recurses chain of recurse_check to catch mutual recursion
Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@@ -80,12 +80,13 @@ Returns: the minimum length
static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
- const pcre_uchar *startcode, int options, int recurse_depth)
+ const pcre_uchar *startcode, int options, recurse_check *recurses)
{
int length = -1;
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
BOOL utf = (options & PCRE_UTF8) != 0;
BOOL had_recurse = FALSE;
+recurse_check this_recurse;
register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
@@ -130,7 +131,7 @@ for (;;)
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
- d = find_minlength(re, cc, startcode, options, recurse_depth);
+ d = find_minlength(re, cc, startcode, options, recurses);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -393,17 +394,31 @@ for (;;)
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
- if ((cc > cs && cc < ce) || recurse_depth > 10)
+ if (cc > cs && cc < ce) /* Simple recursion */
{
d = 0;
had_recurse = TRUE;
break;
}
else
- {
- int dd = find_minlength(re, cs, startcode, options, recurse_depth+1);
- if (dd < d) d = dd;
- }
+ {
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+ if (r != NULL) /* Mutual recursion */
+ {
+ d = 0;
+ had_recurse = TRUE;
+ break;
+ }
+ else
+ {
+ int dd;
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ dd = find_minlength(re, cs, startcode, options, &this_recurse);
+ if (dd < d) d = dd;
+ }
+ }
slot += re->name_entry_size;
}
}
@@ -418,14 +433,26 @@ for (;;)
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
- if ((cc > cs && cc < ce) || recurse_depth > 10)
+ if (cc > cs && cc < ce) /* Simple recursion */
{
d = 0;
had_recurse = TRUE;
}
else
- {
- d = find_minlength(re, cs, startcode, options, recurse_depth + 1);
+ {
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+ if (r != NULL) /* Mutual recursion */
+ {
+ d = 0;
+ had_recurse = TRUE;
+ }
+ else
+ {
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ d = find_minlength(re, cs, startcode, options, &this_recurse);
+ }
}
}
else d = 0;
@@ -474,12 +501,21 @@ for (;;)
case OP_RECURSE:
cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
do ce += GET(ce, 1); while (*ce == OP_ALT);
- if ((cc > cs && cc < ce) || recurse_depth > 10)
+ if (cc > cs && cc < ce) /* Simple recursion */
had_recurse = TRUE;
else
- {
- branchlength += find_minlength(re, cs, startcode, options,
- recurse_depth + 1);
+ {
+ recurse_check *r = recurses;
+ for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+ if (r != NULL) /* Mutual recursion */
+ had_recurse = TRUE;
+ else
+ {
+ this_recurse.prev = recurses;
+ this_recurse.group = cs;
+ branchlength += find_minlength(re, cs, startcode, options,
+ &this_recurse);
+ }
}
cc += 1 + LINK_SIZE;
break;
@@ -1503,7 +1539,7 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
/* Find the minimum length of subject string. */
-switch(min = find_minlength(re, code, code, re->options, 0))
+switch(min = find_minlength(re, code, code, re->options, NULL))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
diff --git a/testdata/testinput2 b/testdata/testinput2
index b834407..58fe53b 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4150,4 +4150,6 @@ backtracking verbs. --/
/(?<=\Ka)/G+
aaaaa
+/((?2){73}(?2))((?1))/
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 359bcf9..b718df0 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14421,4 +14421,6 @@ Failed: lookbehind assertion is not fixed length at offset 17
0: a
0+
+/((?2){73}(?2))((?1))/
+
/-- End of testinput2 --/