diff options
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r-- | ext/pcre/pcrelib/pcre_compile.c | 78 |
1 files changed, 65 insertions, 13 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c index eaf3d90ce8..54e23ea39e 100644 --- a/ext/pcre/pcrelib/pcre_compile.c +++ b/ext/pcre/pcrelib/pcre_compile.c @@ -90,6 +90,11 @@ is 4 there is plenty of room. */ #define COMPILE_WORK_SIZE (4096) +/* The overrun tests check for a slightly smaller size so that they detect the +overrun before it actually does run off the end of the data block. */ + +#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100) + /* Table for handling escaped characters in the range '0'-'z'. Positive returns are simple data values; negative values are for special things like \d and so @@ -261,7 +266,11 @@ the number of relocations needed when a shared library is loaded dynamically, it is now one long string. We cannot use a table of offsets, because the lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we simply count through to the one we want - this isn't a performance issue -because these strings are used only when there is a compilation error. */ +because these strings are used only when there is a compilation error. + +Each substring ends with \0 to insert a null character. This includes the final +substring, so that the whole string ends with \0\0, which can be detected when +counting through. */ static const char error_texts[] = "no error\0" @@ -342,8 +351,7 @@ static const char error_texts[] = "digit expected after (?+\0" "] is an invalid data character in JavaScript compatibility mode\0" /* 65 */ - "different names for subpatterns of the same number are not allowed"; - + "different names for subpatterns of the same number are not allowed\0"; /* Table to identify digits and hex digits. This is used when compiling patterns. Note that the tables in chartables are dependent on the locale, and @@ -501,7 +509,11 @@ static const char * find_error_text(int n) { const char *s = error_texts; -for (; n > 0; n--) while (*s++ != 0) {}; +for (; n > 0; n--) + { + while (*s++ != 0) {}; + if (*s == 0) return "Error text not found (please report)"; + } return s; } @@ -1441,6 +1453,7 @@ for (;;) case OP_CALLOUT: case OP_SOD: case OP_SOM: + case OP_SET_SOM: case OP_EOD: case OP_EODN: case OP_CIRC: @@ -1775,12 +1788,14 @@ Arguments: code points to start of search endcode points to where to stop utf8 TRUE if in UTF8 mode + cd contains pointers to tables etc. Returns: TRUE if what is matched could be empty */ static BOOL -could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8) +could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8, + compile_data *cd) { register int c; for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE); @@ -1811,6 +1826,28 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE continue; } + /* For a recursion/subroutine call, if its end has been reached, which + implies a subroutine call, we can scan it. */ + + if (c == OP_RECURSE) + { + BOOL empty_branch = FALSE; + const uschar *scode = cd->start_code + GET(code, 1); + if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ + do + { + if (could_be_empty_branch(scode, endcode, utf8, cd)) + { + empty_branch = TRUE; + break; + } + scode += GET(scode, 1); + } + while (*scode == OP_ALT); + if (!empty_branch) return FALSE; /* All branches are non-empty */ + continue; + } + /* For other groups, scan the branches. */ if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND) @@ -1829,7 +1866,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf8)) + if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd)) empty_branch = TRUE; code += GET(code, 1); } @@ -1963,6 +2000,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; break; #endif + + /* None of the remaining opcodes are required to match a character. */ + + default: + break; } } @@ -1985,17 +2027,18 @@ Arguments: endcode points to where to stop (current RECURSE item) bcptr points to the chain of current (unclosed) branch starts utf8 TRUE if in UTF-8 mode + cd pointers to tables etc Returns: TRUE if what is matched could be empty */ static BOOL could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, - BOOL utf8) + BOOL utf8, compile_data *cd) { while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd)) return FALSE; bcptr = bcptr->outer; } @@ -2720,7 +2763,7 @@ for (;; ptr++) #ifdef PCRE_DEBUG if (code > cd->hwm) cd->hwm = code; /* High water info */ #endif - if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */ + if (code > cd->start_workspace + WORK_SIZE_CHECK) /* Check for overrun */ { *errorcodeptr = ERR52; goto FAILED; @@ -2769,7 +2812,7 @@ for (;; ptr++) /* In the real compile phase, just check the workspace used by the forward reference list. */ - else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE) + else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK) { *errorcodeptr = ERR52; goto FAILED; @@ -4353,7 +4396,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ uschar *scode = bracode; do { - if (could_be_empty_branch(scode, ketcode, utf8)) + if (could_be_empty_branch(scode, ketcode, utf8, cd)) { *bracode += OP_SBRA - OP_BRA; break; @@ -4428,7 +4471,12 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; + /* Because we are moving code along, we must ensure that any + pending recursive references are updated. */ + default: + *code = OP_END; + adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm); memmove(tempcode + 1+LINK_SIZE, tempcode, len); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; @@ -5147,6 +5195,11 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ *errorcodeptr = ERR15; goto FAILED; } + + /* Fudge the value of "called" so that when it is inserted as an + offset below, what it actually inserted is the reference number + of the group. */ + called = cd->start_code + recno; PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code); } @@ -5156,7 +5209,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ recursion that could loop for ever, and diagnose that case. */ else if (GET(called, 1) == 0 && - could_be_empty(called, code, bcptr, utf8)) + could_be_empty(called, code, bcptr, utf8, cd)) { *errorcodeptr = ERR40; goto FAILED; @@ -6802,7 +6855,6 @@ if (reqbyte >= 0 && case when building a production library. */ #ifdef PCRE_DEBUG - printf("Length = %d top_bracket = %d top_backref = %d\n", length, re->top_bracket, re->top_backref); |