diff options
author | Ilia Alshanetsky <iliaa@php.net> | 2010-01-21 17:49:38 +0000 |
---|---|---|
committer | Ilia Alshanetsky <iliaa@php.net> | 2010-01-21 17:49:38 +0000 |
commit | 50016d9dec9be2a11bbdf195924a6becfb27adbf (patch) | |
tree | ace84671409618cb4d3d9c40218ded5ae97ba50b /ext/pcre/pcrelib/pcre_compile.c | |
parent | 6dda7add548cbd61e93784a6916382f7a7e8879d (diff) | |
download | php-git-50016d9dec9be2a11bbdf195924a6becfb27adbf.tar.gz |
Upgraded bundled PCRE to version 8.01
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r-- | ext/pcre/pcrelib/pcre_compile.c | 104 |
1 files changed, 76 insertions, 28 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c index b197c1b548..eaf3d90ce8 100644 --- a/ext/pcre/pcrelib/pcre_compile.c +++ b/ext/pcre/pcrelib/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -51,10 +51,11 @@ supporting internal functions that are not used by other modules. */ #include "pcre_internal.h" -/* When DEBUG is defined, we need the pcre_printint() function, which is also -used by pcretest. DEBUG is not defined when building a production library. */ +/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is +also used by pcretest. PCRE_DEBUG is not defined when building a production +library. */ -#ifdef DEBUG +#ifdef PCRE_DEBUG #include "pcre_printint.src" #endif @@ -1992,9 +1993,10 @@ static BOOL could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, BOOL utf8) { -while (bcptr != NULL && bcptr->current >= code) +while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE; + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) + return FALSE; bcptr = bcptr->outer; } return TRUE; @@ -2656,7 +2658,7 @@ BOOL utf8 = FALSE; uschar *utf8_char = NULL; #endif -#ifdef DEBUG +#ifdef PCRE_DEBUG if (lengthptr != NULL) DPRINTF((">> start branch\n")); #endif @@ -2715,7 +2717,7 @@ for (;; ptr++) if (lengthptr != NULL) { -#ifdef DEBUG +#ifdef PCRE_DEBUG if (code > cd->hwm) cd->hwm = code; /* High water info */ #endif if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */ @@ -4211,13 +4213,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { /* In the pre-compile phase, we don't actually do the replication. We just adjust the length as if we had. Do some paranoid checks for - potential integer overflow. */ + potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit + integer type when available, otherwise double. */ if (lengthptr != NULL) { int delta = (repeat_min - 1)*length_prevgroup; - if ((double)(repeat_min - 1)*(double)length_prevgroup > - (double)INT_MAX || + if ((INT64_OR_DOUBLE)(repeat_min - 1)* + (INT64_OR_DOUBLE)length_prevgroup > + (INT64_OR_DOUBLE)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -4263,15 +4267,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ just adjust the length as if we had. For each repetition we must add 1 to the length for BRAZERO and for all but the last repetition we must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some - paranoid checks to avoid integer overflow. */ + paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is + a 64-bit integer type when available, otherwise double. */ if (lengthptr != NULL && repeat_max > 0) { int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ - if ((double)repeat_max * - (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - > (double)INT_MAX || + if ((INT64_OR_DOUBLE)repeat_max * + (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) + > (INT64_OR_DOUBLE)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -5246,7 +5251,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { cd->external_options = newoptions; } - else + else { if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) { @@ -5583,6 +5588,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ if (-c >= ESC_REF) { + open_capitem *oc; recno = -c - ESC_REF; HANDLE_REFERENCE: /* Come here from named backref handling */ @@ -5592,6 +5598,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ PUT2INC(code, 0, recno); cd->backref_map |= (recno < 32)? (1 << recno) : 1; if (recno > cd->top_backref) cd->top_backref = recno; + + /* Check to see if this back reference is recursive, that it, it + is inside the group that it references. A flag is set so that the + group can be made atomic. */ + + for (oc = cd->open_caps; oc != NULL; oc = oc->next) + { + if (oc->number == recno) + { + oc->flag = TRUE; + break; + } + } } /* So are Unicode property matches, if supported. */ @@ -5781,10 +5800,11 @@ int branchfirstbyte, branchreqbyte; int length; int orig_bracount; int max_bracount; +int old_external_options = cd->external_options; branch_chain bc; bc.outer = bcptr; -bc.current = code; +bc.current_branch = code; firstbyte = reqbyte = REQ_UNSET; @@ -5803,13 +5823,15 @@ them global. It tests the value of length for (2 + 2*LINK_SIZE) in the pre-compile phase to find out whether anything has yet been compiled or not. */ /* If this is a capturing subpattern, add to the chain of open capturing items -so that we can detect them if (*ACCEPT) is encountered. */ +so that we can detect them if (*ACCEPT) is encountered. This is also used to +detect groups that contain recursive back references to themselves. */ if (*code == OP_CBRA) { capnumber = GET2(code, 1 + LINK_SIZE); capitem.number = capnumber; capitem.next = cd->open_caps; + capitem.flag = FALSE; cd->open_caps = &capitem; } @@ -5857,6 +5879,15 @@ for (;;) return FALSE; } + /* If the external options have changed during this branch, it means that we + are at the top level, and a leading option setting has been encountered. We + need to re-set the original option values to take account of this so that, + during the pre-compile phase, we know to allow for a re-set at the start of + subsequent branches. */ + + if (old_external_options != cd->external_options) + oldims = cd->external_options & PCRE_IMS; + /* Keep the highest bracket count in case (?| was used and some branch has fewer than the rest. */ @@ -5957,17 +5988,34 @@ for (;;) while (branch_length > 0); } - /* If it was a capturing subpattern, remove it from the chain. */ - - if (capnumber > 0) cd->open_caps = cd->open_caps->next; - /* Fill in the ket */ *code = OP_KET; PUT(code, 1, code - start_bracket); code += 1 + LINK_SIZE; - /* Resetting option if needed */ + /* If it was a capturing subpattern, check to see if it contained any + recursive back references. If so, we must wrap it in atomic brackets. + In any event, remove the block from the chain. */ + + if (capnumber > 0) + { + if (cd->open_caps->flag) + { + memmove(start_bracket + 1 + LINK_SIZE, start_bracket, + code - start_bracket); + *start_bracket = OP_ONCE; + code += 1 + LINK_SIZE; + PUT(start_bracket, 1, code - start_bracket); + *code = OP_KET; + PUT(code, 1, code - start_bracket); + code += 1 + LINK_SIZE; + length += 2 + 2*LINK_SIZE; + } + cd->open_caps = cd->open_caps->next; + } + + /* Reset options if needed. */ if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS) { @@ -6016,7 +6064,7 @@ for (;;) { *code = OP_ALT; PUT(code, 1, code - last_branch); - bc.current = last_branch = code; + bc.current_branch = last_branch = code; code += 1 + LINK_SIZE; } @@ -6432,7 +6480,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && #ifdef SUPPORT_UTF8 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && - (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) + (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0) { errorcode = ERR44; goto PCRE_EARLY_ERROR_RETURN2; @@ -6629,7 +6677,7 @@ if debugging, leave the test till after things are printed out. */ *code++ = OP_END; -#ifndef DEBUG +#ifndef PCRE_DEBUG if (code - codestart > length) errorcode = ERR23; #endif @@ -6753,7 +6801,7 @@ if (reqbyte >= 0 && /* Print out the compiled data if debugging is enabled. This is never the case when building a production library. */ -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("Length = %d top_bracket = %d top_backref = %d\n", length, re->top_bracket, re->top_backref); @@ -6791,7 +6839,7 @@ if (code - codestart > length) if (errorcodeptr != NULL) *errorcodeptr = ERR23; return NULL; } -#endif /* DEBUG */ +#endif /* PCRE_DEBUG */ return (pcre *)re; } |