summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_compile.c
diff options
context:
space:
mode:
authorIlia Alshanetsky <iliaa@php.net>2010-01-21 17:49:38 +0000
committerIlia Alshanetsky <iliaa@php.net>2010-01-21 17:49:38 +0000
commit50016d9dec9be2a11bbdf195924a6becfb27adbf (patch)
treeace84671409618cb4d3d9c40218ded5ae97ba50b /ext/pcre/pcrelib/pcre_compile.c
parent6dda7add548cbd61e93784a6916382f7a7e8879d (diff)
downloadphp-git-50016d9dec9be2a11bbdf195924a6becfb27adbf.tar.gz
Upgraded bundled PCRE to version 8.01
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r--ext/pcre/pcrelib/pcre_compile.c104
1 files changed, 76 insertions, 28 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index b197c1b548..eaf3d90ce8 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2010 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -51,10 +51,11 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
-/* When DEBUG is defined, we need the pcre_printint() function, which is also
-used by pcretest. DEBUG is not defined when building a production library. */
+/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
+also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. */
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#include "pcre_printint.src"
#endif
@@ -1992,9 +1993,10 @@ static BOOL
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
BOOL utf8)
{
-while (bcptr != NULL && bcptr->current >= code)
+while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+ return FALSE;
bcptr = bcptr->outer;
}
return TRUE;
@@ -2656,7 +2658,7 @@ BOOL utf8 = FALSE;
uschar *utf8_char = NULL;
#endif
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (lengthptr != NULL) DPRINTF((">> start branch\n"));
#endif
@@ -2715,7 +2717,7 @@ for (;; ptr++)
if (lengthptr != NULL)
{
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (code > cd->hwm) cd->hwm = code; /* High water info */
#endif
if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
@@ -4211,13 +4213,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
/* In the pre-compile phase, we don't actually do the replication. We
just adjust the length as if we had. Do some paranoid checks for
- potential integer overflow. */
+ potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+ integer type when available, otherwise double. */
if (lengthptr != NULL)
{
int delta = (repeat_min - 1)*length_prevgroup;
- if ((double)(repeat_min - 1)*(double)length_prevgroup >
- (double)INT_MAX ||
+ if ((INT64_OR_DOUBLE)(repeat_min - 1)*
+ (INT64_OR_DOUBLE)length_prevgroup >
+ (INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -4263,15 +4267,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
just adjust the length as if we had. For each repetition we must add 1
to the length for BRAZERO and for all but the last repetition we must
add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
- paranoid checks to avoid integer overflow. */
+ paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
+ a 64-bit integer type when available, otherwise double. */
if (lengthptr != NULL && repeat_max > 0)
{
int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
2 - 2*LINK_SIZE; /* Last one doesn't nest */
- if ((double)repeat_max *
- (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
- > (double)INT_MAX ||
+ if ((INT64_OR_DOUBLE)repeat_max *
+ (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+ > (INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -5246,7 +5251,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
cd->external_options = newoptions;
}
- else
+ else
{
if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
{
@@ -5583,6 +5588,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (-c >= ESC_REF)
{
+ open_capitem *oc;
recno = -c - ESC_REF;
HANDLE_REFERENCE: /* Come here from named backref handling */
@@ -5592,6 +5598,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
PUT2INC(code, 0, recno);
cd->backref_map |= (recno < 32)? (1 << recno) : 1;
if (recno > cd->top_backref) cd->top_backref = recno;
+
+ /* Check to see if this back reference is recursive, that it, it
+ is inside the group that it references. A flag is set so that the
+ group can be made atomic. */
+
+ for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+ {
+ if (oc->number == recno)
+ {
+ oc->flag = TRUE;
+ break;
+ }
+ }
}
/* So are Unicode property matches, if supported. */
@@ -5781,10 +5800,11 @@ int branchfirstbyte, branchreqbyte;
int length;
int orig_bracount;
int max_bracount;
+int old_external_options = cd->external_options;
branch_chain bc;
bc.outer = bcptr;
-bc.current = code;
+bc.current_branch = code;
firstbyte = reqbyte = REQ_UNSET;
@@ -5803,13 +5823,15 @@ them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
pre-compile phase to find out whether anything has yet been compiled or not. */
/* If this is a capturing subpattern, add to the chain of open capturing items
-so that we can detect them if (*ACCEPT) is encountered. */
+so that we can detect them if (*ACCEPT) is encountered. This is also used to
+detect groups that contain recursive back references to themselves. */
if (*code == OP_CBRA)
{
capnumber = GET2(code, 1 + LINK_SIZE);
capitem.number = capnumber;
capitem.next = cd->open_caps;
+ capitem.flag = FALSE;
cd->open_caps = &capitem;
}
@@ -5857,6 +5879,15 @@ for (;;)
return FALSE;
}
+ /* If the external options have changed during this branch, it means that we
+ are at the top level, and a leading option setting has been encountered. We
+ need to re-set the original option values to take account of this so that,
+ during the pre-compile phase, we know to allow for a re-set at the start of
+ subsequent branches. */
+
+ if (old_external_options != cd->external_options)
+ oldims = cd->external_options & PCRE_IMS;
+
/* Keep the highest bracket count in case (?| was used and some branch
has fewer than the rest. */
@@ -5957,17 +5988,34 @@ for (;;)
while (branch_length > 0);
}
- /* If it was a capturing subpattern, remove it from the chain. */
-
- if (capnumber > 0) cd->open_caps = cd->open_caps->next;
-
/* Fill in the ket */
*code = OP_KET;
PUT(code, 1, code - start_bracket);
code += 1 + LINK_SIZE;
- /* Resetting option if needed */
+ /* If it was a capturing subpattern, check to see if it contained any
+ recursive back references. If so, we must wrap it in atomic brackets.
+ In any event, remove the block from the chain. */
+
+ if (capnumber > 0)
+ {
+ if (cd->open_caps->flag)
+ {
+ memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
+ code - start_bracket);
+ *start_bracket = OP_ONCE;
+ code += 1 + LINK_SIZE;
+ PUT(start_bracket, 1, code - start_bracket);
+ *code = OP_KET;
+ PUT(code, 1, code - start_bracket);
+ code += 1 + LINK_SIZE;
+ length += 2 + 2*LINK_SIZE;
+ }
+ cd->open_caps = cd->open_caps->next;
+ }
+
+ /* Reset options if needed. */
if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
{
@@ -6016,7 +6064,7 @@ for (;;)
{
*code = OP_ALT;
PUT(code, 1, code - last_branch);
- bc.current = last_branch = code;
+ bc.current_branch = last_branch = code;
code += 1 + LINK_SIZE;
}
@@ -6432,7 +6480,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
#ifdef SUPPORT_UTF8
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
- (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
+ (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
{
errorcode = ERR44;
goto PCRE_EARLY_ERROR_RETURN2;
@@ -6629,7 +6677,7 @@ if debugging, leave the test till after things are printed out. */
*code++ = OP_END;
-#ifndef DEBUG
+#ifndef PCRE_DEBUG
if (code - codestart > length) errorcode = ERR23;
#endif
@@ -6753,7 +6801,7 @@ if (reqbyte >= 0 &&
/* Print out the compiled data if debugging is enabled. This is never the
case when building a production library. */
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("Length = %d top_bracket = %d top_backref = %d\n",
length, re->top_bracket, re->top_backref);
@@ -6791,7 +6839,7 @@ if (code - codestart > length)
if (errorcodeptr != NULL) *errorcodeptr = ERR23;
return NULL;
}
-#endif /* DEBUG */
+#endif /* PCRE_DEBUG */
return (pcre *)re;
}