From e75637b2efc61520e5bb4a99643620a27d4b036a Mon Sep 17 00:00:00 2001 From: ph10 Date: Mon, 13 Apr 2015 09:31:55 +0000 Subject: Fix slow study when much mutual recursion. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1547 2f5784b3-3f2a-0410-8824-cb99058d5e15 --- ChangeLog | 6 +++++ pcre_compile.c | 8 ------- pcre_internal.h | 7 ++++++ pcre_study.c | 68 +++++++++++++++++++++++++++++++++++++++------------- testdata/testinput2 | 2 ++ testdata/testoutput2 | 2 ++ 6 files changed, 69 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index a99591f..816a390 100644 --- a/ChangeLog +++ b/ChangeLog @@ -158,6 +158,12 @@ Version 8.37 xx-xxx-2015 other cases where backtracking after \C could crash. This set of bugs was discovered by the LLVM fuzzer. +20. The function for finding the minimum length of a matching string could take + a very long time if mutual recursion was present many times in a pattern, + for example, /((?2){73}(?2))((?1))/. A better mutual recursion detection + method has been implemented. This infelicity was discovered by the LLVM + fuzzer. + Version 8.36 26-September-2014 ------------------------------ diff --git a/pcre_compile.c b/pcre_compile.c index 6510835..5d819f0 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -866,14 +866,6 @@ static const pcre_uint8 opcode_possessify[] = { }; -/* Structure for mutual recursion detection. */ - -typedef struct recurse_check { - struct recurse_check *prev; - const pcre_uchar *group; -} recurse_check; - - /************************************************* * Find an error text * diff --git a/pcre_internal.h b/pcre_internal.h index 1c5f4ce..871520e 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -2460,6 +2460,13 @@ typedef struct branch_chain { pcre_uchar *current_branch; } branch_chain; +/* Structure for mutual recursion detection. */ + +typedef struct recurse_check { + struct recurse_check *prev; + const pcre_uchar *group; +} recurse_check; + /* Structure for items in a linked list that represents an explicit recursive call within the pattern; used by pcre_exec(). */ diff --git a/pcre_study.c b/pcre_study.c index a2458c4..a4c7428 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -70,7 +70,7 @@ Arguments: code pointer to start of group (the bracket) startcode pointer to start of the whole pattern's code options the compiling options - int RECURSE depth + recurses chain of recurse_check to catch mutual recursion Returns: the minimum length -1 if \C in UTF-8 mode or (*ACCEPT) was encountered @@ -80,12 +80,13 @@ Returns: the minimum length static int find_minlength(const REAL_PCRE *re, const pcre_uchar *code, - const pcre_uchar *startcode, int options, int recurse_depth) + const pcre_uchar *startcode, int options, recurse_check *recurses) { int length = -1; /* PCRE_UTF16 has the same value as PCRE_UTF8. */ BOOL utf = (options & PCRE_UTF8) != 0; BOOL had_recurse = FALSE; +recurse_check this_recurse; register int branchlength = 0; register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; @@ -130,7 +131,7 @@ for (;;) case OP_SBRAPOS: case OP_ONCE: case OP_ONCE_NC: - d = find_minlength(re, cc, startcode, options, recurse_depth); + d = find_minlength(re, cc, startcode, options, recurses); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -393,17 +394,31 @@ for (;;) ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if ((cc > cs && cc < ce) || recurse_depth > 10) + if (cc > cs && cc < ce) /* Simple recursion */ { d = 0; had_recurse = TRUE; break; } else - { - int dd = find_minlength(re, cs, startcode, options, recurse_depth+1); - if (dd < d) d = dd; - } + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + { + d = 0; + had_recurse = TRUE; + break; + } + else + { + int dd; + this_recurse.prev = recurses; + this_recurse.group = cs; + dd = find_minlength(re, cs, startcode, options, &this_recurse); + if (dd < d) d = dd; + } + } slot += re->name_entry_size; } } @@ -418,14 +433,26 @@ for (;;) ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if ((cc > cs && cc < ce) || recurse_depth > 10) + if (cc > cs && cc < ce) /* Simple recursion */ { d = 0; had_recurse = TRUE; } else - { - d = find_minlength(re, cs, startcode, options, recurse_depth + 1); + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + { + d = 0; + had_recurse = TRUE; + } + else + { + this_recurse.prev = recurses; + this_recurse.group = cs; + d = find_minlength(re, cs, startcode, options, &this_recurse); + } } } else d = 0; @@ -474,12 +501,21 @@ for (;;) case OP_RECURSE: cs = ce = (pcre_uchar *)startcode + GET(cc, 1); do ce += GET(ce, 1); while (*ce == OP_ALT); - if ((cc > cs && cc < ce) || recurse_depth > 10) + if (cc > cs && cc < ce) /* Simple recursion */ had_recurse = TRUE; else - { - branchlength += find_minlength(re, cs, startcode, options, - recurse_depth + 1); + { + recurse_check *r = recurses; + for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; + if (r != NULL) /* Mutual recursion */ + had_recurse = TRUE; + else + { + this_recurse.prev = recurses; + this_recurse.group = cs; + branchlength += find_minlength(re, cs, startcode, options, + &this_recurse); + } } cc += 1 + LINK_SIZE; break; @@ -1503,7 +1539,7 @@ if ((re->options & PCRE_ANCHORED) == 0 && /* Find the minimum length of subject string. */ -switch(min = find_minlength(re, code, code, re->options, 0)) +switch(min = find_minlength(re, code, code, re->options, NULL)) { case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; case -3: *errorptr = "internal error: opcode not recognized"; return NULL; diff --git a/testdata/testinput2 b/testdata/testinput2 index b834407..58fe53b 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4150,4 +4150,6 @@ backtracking verbs. --/ /(?<=\Ka)/G+ aaaaa +/((?2){73}(?2))((?1))/ + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 359bcf9..b718df0 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14421,4 +14421,6 @@ Failed: lookbehind assertion is not fixed length at offset 17 0: a 0+ +/((?2){73}(?2))((?1))/ + /-- End of testinput2 --/ -- cgit v1.2.1