From ca896b83b391bf6de7a7bf6e76da3bb02eb7001f Mon Sep 17 00:00:00 2001 From: ph10 Date: Mon, 1 Feb 2021 17:56:12 +0000 Subject: Fix some numerical checking bugs, Bugzilla 2690. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1295 6239d852-aaf2-0410-a92c-79f79f948069 --- ChangeLog | 9 +++++++++ src/pcre2_compile.c | 52 ++++++++++++++++++++++++++++++++---------------- testdata/testinput1 | 9 +++++++++ testdata/testinput11 | 3 +++ testdata/testinput2 | 2 -- testdata/testinput9 | 3 +++ testdata/testoutput1 | 12 +++++++++++ testdata/testoutput11-16 | 4 ++++ testdata/testoutput11-32 | 4 ++++ testdata/testoutput2 | 3 --- testdata/testoutput9 | 4 ++++ 11 files changed, 83 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3fd886a..2694119 100644 --- a/ChangeLog +++ b/ChangeLog @@ -16,6 +16,15 @@ but I have put in a check in order to get rid of the compiler error. 3. An alternative patch for CMakeLists.txt because 10.36 # 4 breaks CMake on Windows. Patch from email@cs-ware.de fixes bugzilla #2688. +4. Two bugs related to over-large numbers have been fixed so the behaviour is +now the same as Perl. + + (a) A pattern such as /\214748364/ gave an overflow error instead of being + treated as the octal number \214 followed by literal digits. + + (b) A sequence such as {65536 that has no terminating } so is not a + quantifier was nevertheless complaining that a quantifier number was too big. + Version 10.36 04-December-2020 ------------------------------ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index e811f12..4bd5be0 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1398,32 +1398,47 @@ static BOOL read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, uint32_t *maxp, int *errorcodeptr) { -PCRE2_SPTR p = *ptrptr; +PCRE2_SPTR p; BOOL yield = FALSE; +BOOL had_comma = FALSE; int32_t min = 0; int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ -/* NB read_number() initializes the error code to zero. The only error is for a -number that is too big. */ +/* Check the syntax */ +*errorcodeptr = 0; +for (p = *ptrptr;; p++) + { + uint32_t c; + if (p >= ptrend) return FALSE; + c = *p; + if (IS_DIGIT(c)) continue; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + if (c == CHAR_COMMA) + { + if (had_comma) return FALSE; + had_comma = TRUE; + } + else return FALSE; + } + +/* The only error from read_number() is for a number that is too big. */ + +p = *ptrptr; if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) goto EXIT; -if (p >= ptrend) goto EXIT; - if (*p == CHAR_RIGHT_CURLY_BRACKET) { p++; max = min; } - else { - if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT; - if (*p != CHAR_RIGHT_CURLY_BRACKET) + if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) { if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, - errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + errorcodeptr)) goto EXIT; if (max < min) { @@ -1438,11 +1453,10 @@ yield = TRUE; if (minp != NULL) *minp = (uint32_t)min; if (maxp != NULL) *maxp = (uint32_t)max; -/* Update the pattern pointer on success, or after an error, but not when -the result is "not a repeat quantifier". */ +/* Update the pattern pointer */ EXIT: -if (yield || *errorcodeptr != 0) *ptrptr = p; +*ptrptr = p; return yield; } @@ -1776,19 +1790,23 @@ else { oldptr = ptr; ptr--; /* Back to the digit */ - if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s, - errorcodeptr)) - break; - /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + /* As we know we are at a digit, the only possible error from + read_number() is a number that is too large to be a group number. In this + case we fall through handle this as not a group reference. If we have + read a small enough number, check for a back reference. + + \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount) + if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && + (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) { if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; else escape = -s; /* Indicates a back reference */ break; } + ptr = oldptr; /* Put the pointer back and fall through */ } diff --git a/testdata/testinput1 b/testdata/testinput1 index d4e42ba..bb4ddb7 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6420,4 +6420,13 @@ ef) x/x,mark /(?(DEFINE)(?bar))(?\x{8c}748364< + +/a{65536/ + >a{65536< + # End of testinput1 diff --git a/testdata/testinput11 b/testdata/testinput11 index 2d267d6..2bc8a25 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -368,4 +368,7 @@ abÿAz ab\x{80000041}z +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + # End of testinput11 diff --git a/testdata/testinput2 b/testdata/testinput2 index 3f9dd6d..87e3394 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2189,8 +2189,6 @@ /a(*MARK)b/ -/(?i:A{1,}\6666666666)/ - /\g6666666666/ /[\g6666666666]/B diff --git a/testdata/testinput9 b/testdata/testinput9 index 7be4b15..4eb228a 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -260,4 +260,7 @@ /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + # End of testinput9 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 16c91d2..05b310b 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -10176,4 +10176,16 @@ No match word 0: word +/a{1,2,3}b/ + a{1,2,3}b + 0: a{1,2,3}b + +/\214748364/ + >\x{8c}748364< + 0: \x8c748364 + +/a{65536/ + >a{65536< + 0: a{65536 + # End of testinput1 diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16 index 78bf7fb..8768785 100644 --- a/testdata/testoutput11-16 +++ b/testdata/testoutput11-16 @@ -661,4 +661,8 @@ Subject length lower bound = 1 abÿAz ab\x{80000041}z +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + 0: A\x{1b6}6666666 + # End of testinput11 diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32 index 4b00384..2c95f61 100644 --- a/testdata/testoutput11-32 +++ b/testdata/testoutput11-32 @@ -667,4 +667,8 @@ Subject length lower bound = 1 ab\x{80000041}z 0: ab\x{80000041}z +/(?i:A{1,}\6666666666)/ + A\x{1b6}6666666 + 0: A\x{1b6}6666666 + # End of testinput11 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index dc17011..4d8f65d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -8374,9 +8374,6 @@ No match /a(*MARK)b/ Failed: error 166 at offset 7: (*MARK) must have an argument -/(?i:A{1,}\6666666666)/ -Failed: error 161 at offset 19: subpattern number is too big - /\g6666666666/ Failed: error 161 at offset 7: subpattern number is too big diff --git a/testdata/testoutput9 b/testdata/testoutput9 index f98f276..1ec4317 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -367,4 +367,8 @@ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is to /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) +/(?i:A{1,}\6666666666)/ +Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode + A\x{1b6}6666666 + # End of testinput9 -- cgit v1.2.1