summaryrefslogtreecommitdiff
path: root/pcre_compile.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-11-10 19:04:34 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-11-10 19:04:34 +0000
commitb79cc767bf7081781e78955af3c986c2119bcdd3 (patch)
tree583022a943abc9aa76252150bbdba279195cd362 /pcre_compile.c
parent7de890de6074833fd0b0ed433c69a431cd7bf0cb (diff)
downloadpcre-b79cc767bf7081781e78955af3c986c2119bcdd3.tar.gz
In /x mode, allow white space before a possessive + character.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1396 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_compile.c')
-rw-r--r--pcre_compile.c97
1 files changed, 67 insertions, 30 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 903b466..688efe5 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4446,7 +4446,7 @@ for (;; ptr++)
/* Get next character in the pattern */
c = *ptr;
-
+
/* If we are at the end of a nested substitution, revert to the outer level
string. Nesting only happens one level deep. */
@@ -4548,8 +4548,37 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
}
+ /* Control does not reach here. */
}
+ /* In extended mode, skip white space and comments. We need a loop in order
+ to check for more white space and more comments after a comment. */
+
+ if ((options & PCRE_EXTENDED) != 0)
+ {
+ for (;;)
+ {
+ while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
+ if (c != CHAR_NUMBER_SIGN) break;
+ ptr++;
+ while (*ptr != CHAR_NULL)
+ {
+ if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
+ { /* IS_NEWLINE sets cd->nllen. */
+ ptr += cd->nllen;
+ break;
+ }
+ ptr++;
+#ifdef SUPPORT_UTF
+ if (utf) FORWARDCHAR(ptr);
+#endif
+ }
+ c = *ptr; /* Either NULL or the char after a newline */
+ }
+ }
+
+ /* See if the next thing is a quantifier. */
+
is_quantifier =
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
@@ -4565,42 +4594,21 @@ for (;; ptr++)
previous_callout = NULL;
}
- /* In extended mode, skip white space and comments. */
-
- if ((options & PCRE_EXTENDED) != 0)
- {
- if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue;
- if (c == CHAR_NUMBER_SIGN)
- {
- ptr++;
- while (*ptr != CHAR_NULL)
- {
- if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
- ptr++;
-#ifdef SUPPORT_UTF
- if (utf) FORWARDCHAR(ptr);
-#endif
- }
- if (*ptr != CHAR_NULL) continue;
-
- /* Else fall through to handle end of string */
- c = 0;
- }
- }
-
- /* No auto callout for quantifiers, or while processing property strings that
- are substituted for \w etc in UCP mode. */
+ /* Create auto callout, except for quantifiers, or while processing property
+ strings that are substituted for \w etc in UCP mode. */
if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL)
{
previous_callout = code;
code = auto_callout(code, ptr, cd);
}
+
+ /* Process the next pattern item. */
switch(c)
{
/* ===================================================================*/
- case 0: /* The branch terminates at string end */
+ case CHAR_NULL: /* The branch terminates at string end */
case CHAR_VERTICAL_LINE: /* or | or ) */
case CHAR_RIGHT_PARENTHESIS:
*firstcharptr = firstchar;
@@ -5445,6 +5453,34 @@ for (;; ptr++)
insert something before it. */
tempcode = previous;
+
+ /* Before checking for a possessive quantifier, we must skip over
+ whitespace and comments in extended mode because Perl allows white space at
+ this point. */
+
+ if ((options & PCRE_EXTENDED) != 0)
+ {
+ const pcre_uchar *p = ptr + 1;
+ for (;;)
+ {
+ while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++;
+ if (*p != CHAR_NUMBER_SIGN) break;
+ p++;
+ while (*p != CHAR_NULL)
+ {
+ if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */
+ { /* IS_NEWLINE sets cd->nllen. */
+ p += cd->nllen;
+ break;
+ }
+ p++;
+#ifdef SUPPORT_UTF
+ if (utf) FORWARDCHAR(p);
+#endif
+ } /* Loop for comment characters */
+ } /* Loop for multiple comments */
+ ptr = p - 1; /* Character before the next significant one. */
+ }
/* If the next character is '+', we have a possessive quantifier. This
implies greediness, whatever the setting of the PCRE_UNGREEDY option.
@@ -7752,8 +7788,8 @@ for (;; ptr++)
/* ===================================================================*/
/* Handle a literal character. It is guaranteed not to be whitespace or #
- when the extended flag is set. If we are in UTF-8 mode, it may be a
- multi-byte literal character. */
+ when the extended flag is set. If we are in a UTF mode, it may be a
+ multi-unit literal character. */
default:
NORMAL_CHAR:
@@ -8899,7 +8935,7 @@ else
cd->nl[0] = newline;
}
}
-
+
/* Maximum back reference and backref bitmap. The bitmap records up to 31 back
references to help in deciding whether (.*) can be treated as anchored or not.
*/
@@ -8952,6 +8988,7 @@ outside can help speed up starting point checks. */
ptr += skipatstart;
code = cworkspace;
*code = OP_BRA;
+
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL,
cd, &length);