diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2015-08-01 09:30:02 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2015-08-01 09:30:02 +0000 |
commit | 1ee00f9319783443849b9d7246c49eef76964b78 (patch) | |
tree | 1ccc1532905844f8c5a8047e2301079a441226db | |
parent | 92048fd48054e301df2e2cefa5c77b73fb2e9fda (diff) | |
download | pcre-1ee00f9319783443849b9d7246c49eef76964b78.tar.gz |
Give up on minimum length for overly complex patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1584 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | pcre_study.c | 19 | ||||
-rw-r--r-- | testdata/testinput2 | 2 | ||||
-rw-r--r-- | testdata/testoutput2 | 11 |
4 files changed, 29 insertions, 6 deletions
@@ -97,6 +97,9 @@ Version 8.38 xx-xxx-xxxx 25. If (?R was followed by - or + incorrect behaviour happened instead of a diagnostic. + +26. Arrange to give up on finding the minimum matching length for overly + complex patterns. Version 8.37 28-April-2015 diff --git a/pcre_study.c b/pcre_study.c index 998fe23..932e9a7 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -71,6 +71,7 @@ Arguments: startcode pointer to start of the whole pattern's code options the compiling options recurses chain of recurse_check to catch mutual recursion + countptr pointer to call count (to catch over complexity) Returns: the minimum length -1 if \C in UTF-8 mode or (*ACCEPT) was encountered @@ -80,7 +81,8 @@ Returns: the minimum length static int find_minlength(const REAL_PCRE *re, const pcre_uchar *code, - const pcre_uchar *startcode, int options, recurse_check *recurses) + const pcre_uchar *startcode, int options, recurse_check *recurses, + int *countptr) { int length = -1; /* PCRE_UTF16 has the same value as PCRE_UTF8. */ @@ -90,6 +92,8 @@ recurse_check this_recurse; register int branchlength = 0; register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; +if ((*countptr)++ > 1000) return -1; /* too complex */ + if (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; @@ -131,7 +135,7 @@ for (;;) case OP_SBRAPOS: case OP_ONCE: case OP_ONCE_NC: - d = find_minlength(re, cc, startcode, options, recurses); + d = find_minlength(re, cc, startcode, options, recurses, countptr); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -415,7 +419,8 @@ for (;;) int dd; this_recurse.prev = recurses; this_recurse.group = cs; - dd = find_minlength(re, cs, startcode, options, &this_recurse); + dd = find_minlength(re, cs, startcode, options, &this_recurse, + countptr); if (dd < d) d = dd; } } @@ -451,7 +456,8 @@ for (;;) { this_recurse.prev = recurses; this_recurse.group = cs; - d = find_minlength(re, cs, startcode, options, &this_recurse); + d = find_minlength(re, cs, startcode, options, &this_recurse, + countptr); } } } @@ -514,7 +520,7 @@ for (;;) this_recurse.prev = recurses; this_recurse.group = cs; branchlength += find_minlength(re, cs, startcode, options, - &this_recurse); + &this_recurse, countptr); } } cc += 1 + LINK_SIZE; @@ -1453,6 +1459,7 @@ pcre32_study(const pcre32 *external_re, int options, const char **errorptr) #endif { int min; +int count = 0; BOOL bits_set = FALSE; pcre_uint8 start_bits[32]; PUBL(extra) *extra = NULL; @@ -1539,7 +1546,7 @@ if ((re->options & PCRE_ANCHORED) == 0 && /* Find the minimum length of subject string. */ -switch(min = find_minlength(re, code, code, re->options, NULL)) +switch(min = find_minlength(re, code, code, re->options, NULL, &count)) { case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; case -3: *errorptr = "internal error: opcode not recognized"; return NULL; diff --git a/testdata/testinput2 b/testdata/testinput2 index 8f18fa4..df2c1cc 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4192,4 +4192,6 @@ backtracking verbs. --/ /(?R-:(?</ +/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 0ec4b36..d3fc254 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14526,4 +14526,15 @@ Failed: missing terminating ] for character class at offset 353 /(?R-:(?</ Failed: (?R or (?[+-]digits must be followed by ) at offset 3 +/(?1){3918}(((((0(\k'R'))))(?J)(?'R'(?'R'\3){99})))/I +Capturing subpattern count = 8 +Max back reference = 8 +Named capturing subpatterns: + R 7 + R 8 +No options +Duplicate name status changes +No first char +Need char = '0' + /-- End of testinput2 --/ |