summaryrefslogtreecommitdiff
path: root/ext/pcre/pcrelib/pcre_compile.c
diff options
context:
space:
mode:
authorNuno Lopes <nlopess@php.net>2009-04-11 18:57:27 +0000
committerNuno Lopes <nlopess@php.net>2009-04-11 18:57:27 +0000
commit90a2d1979486b2e7fa14be9c3210b9d321c668f8 (patch)
tree9f67ba72ee6010944487be2790c467dd2ea2cbaf /ext/pcre/pcrelib/pcre_compile.c
parentff62b87cd6b94c50fbbac88bf4129f6418efe131 (diff)
downloadphp-git-90a2d1979486b2e7fa14be9c3210b9d321c668f8.tar.gz
upgrade PCRE to version 7.9
Diffstat (limited to 'ext/pcre/pcrelib/pcre_compile.c')
-rw-r--r--ext/pcre/pcrelib/pcre_compile.c861
1 files changed, 542 insertions, 319 deletions
diff --git a/ext/pcre/pcrelib/pcre_compile.c b/ext/pcre/pcrelib/pcre_compile.c
index b079d1962f..1e0672c5cd 100644
--- a/ext/pcre/pcrelib/pcre_compile.c
+++ b/ext/pcre/pcrelib/pcre_compile.c
@@ -95,21 +95,56 @@ are simple data values; negative values are for special things like \d and so
on. Zero means further processing is needed (for things like \x), or the escape
is invalid. */
-#ifndef EBCDIC /* This is the "normal" table for ASCII systems */
+#ifndef EBCDIC
+
+/* This is the "normal" table for ASCII systems or for EBCDIC systems running
+in UTF-8 mode. */
+
static const short int escapes[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
- 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
- '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */
--ESC_H, 0, 0, -ESC_K, 0, 0, 0, 0, /* H - O */
--ESC_P, -ESC_Q, -ESC_R, -ESC_S, 0, 0, -ESC_V, -ESC_W, /* P - W */
--ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
- '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */
--ESC_h, 0, 0, -ESC_k, 0, 0, ESC_n, 0, /* h - o */
--ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, -ESC_v, -ESC_w, /* p - w */
- 0, 0, -ESC_z /* x - z */
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ 0, 0,
+ CHAR_COLON, CHAR_SEMICOLON,
+ CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN,
+ CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK,
+ CHAR_COMMERCIAL_AT, -ESC_A,
+ -ESC_B, -ESC_C,
+ -ESC_D, -ESC_E,
+ 0, -ESC_G,
+ -ESC_H, 0,
+ 0, -ESC_K,
+ 0, 0,
+ 0, 0,
+ -ESC_P, -ESC_Q,
+ -ESC_R, -ESC_S,
+ 0, 0,
+ -ESC_V, -ESC_W,
+ -ESC_X, 0,
+ -ESC_Z, CHAR_LEFT_SQUARE_BRACKET,
+ CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET,
+ CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE,
+ CHAR_GRAVE_ACCENT, 7,
+ -ESC_b, 0,
+ -ESC_d, ESC_e,
+ ESC_f, 0,
+ -ESC_h, 0,
+ 0, -ESC_k,
+ 0, 0,
+ ESC_n, 0,
+ -ESC_p, 0,
+ ESC_r, -ESC_s,
+ ESC_tee, 0,
+ -ESC_v, -ESC_w,
+ 0, 0,
+ -ESC_z
};
-#else /* This is the "abnormal" table for EBCDIC systems */
+#else
+
+/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
+
static const short int escapes[] = {
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,
@@ -140,7 +175,9 @@ static const short int escapes[] = {
/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
searched linearly. Put all the names into a single string, in order to reduce
-the number of relocations when a shared library is dynamically linked. */
+the number of relocations when a shared library is dynamically linked. The
+string is built from string macros so that it works in UTF-8 mode on EBCDIC
+platforms. */
typedef struct verbitem {
int len;
@@ -148,13 +185,13 @@ typedef struct verbitem {
} verbitem;
static const char verbnames[] =
- "ACCEPT\0"
- "COMMIT\0"
- "F\0"
- "FAIL\0"
- "PRUNE\0"
- "SKIP\0"
- "THEN";
+ STRING_ACCEPT0
+ STRING_COMMIT0
+ STRING_F0
+ STRING_FAIL0
+ STRING_PRUNE0
+ STRING_SKIP0
+ STRING_THEN;
static const verbitem verbs[] = {
{ 6, OP_ACCEPT },
@@ -176,9 +213,10 @@ length entry. The first three must be alpha, lower, upper, as this is assumed
for handling case independence. */
static const char posix_names[] =
- "alpha\0" "lower\0" "upper\0" "alnum\0" "ascii\0" "blank\0"
- "cntrl\0" "digit\0" "graph\0" "print\0" "punct\0" "space\0"
- "word\0" "xdigit";
+ STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
+ STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
+ STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
+ STRING_word0 STRING_xdigit;
static const uschar posix_name_lengths[] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
@@ -320,7 +358,11 @@ For convenience, we use the same bit definitions as in chartables:
Then we can use ctype_digit and ctype_xdigit in the code. */
-#ifndef EBCDIC /* This is the "normal" case, for ASCII systems */
+#ifndef EBCDIC
+
+/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
+UTF-8 mode. */
+
static const unsigned char digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
@@ -356,7 +398,10 @@ static const unsigned char digitab[] =
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
-#else /* This is the "abnormal" case, for EBCDIC systems */
+#else
+
+/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
+
static const unsigned char digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
@@ -501,9 +546,9 @@ if (c == 0) *errorcodeptr = ERR1;
in a table. A non-zero result is something that can be returned immediately.
Otherwise further processing may be required. */
-#ifndef EBCDIC /* ASCII coding */
-else if (c < '0' || c > 'z') {} /* Not alphanumeric */
-else if ((i = escapes[c - '0']) != 0) c = i;
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+else if (c < CHAR_0 || c > CHAR_z) {} /* Not alphanumeric */
+else if ((i = escapes[c - CHAR_0]) != 0) c = i;
#else /* EBCDIC coding */
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */
@@ -522,11 +567,11 @@ else
/* A number of Perl escapes are not handled by PCRE. We give an explicit
error. */
- case 'l':
- case 'L':
- case 'N':
- case 'u':
- case 'U':
+ case CHAR_l:
+ case CHAR_L:
+ case CHAR_N:
+ case CHAR_u:
+ case CHAR_U:
*errorcodeptr = ERR37;
break;
@@ -546,8 +591,8 @@ else
(possibly recursive) subroutine calls, _not_ backreferences. Just return
the -ESC_g code (cf \k). */
- case 'g':
- if (ptr[1] == '<' || ptr[1] == '\'')
+ case CHAR_g:
+ if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
{
c = -ESC_g;
break;
@@ -555,12 +600,12 @@ else
/* Handle the Perl-compatible cases */
- if (ptr[1] == '{')
+ if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
const uschar *p;
- for (p = ptr+2; *p != 0 && *p != '}'; p++)
- if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
- if (*p != 0 && *p != '}')
+ for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
+ if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
+ if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
{
c = -ESC_k;
break;
@@ -570,7 +615,7 @@ else
}
else braced = FALSE;
- if (ptr[1] == '-')
+ if (ptr[1] == CHAR_MINUS)
{
negated = TRUE;
ptr++;
@@ -579,7 +624,7 @@ else
c = 0;
while ((digitab[ptr[1]] & ctype_digit) != 0)
- c = c * 10 + *(++ptr) - '0';
+ c = c * 10 + *(++ptr) - CHAR_0;
if (c < 0) /* Integer overflow */
{
@@ -587,7 +632,7 @@ else
break;
}
- if (braced && *(++ptr) != '}')
+ if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
{
*errorcodeptr = ERR57;
break;
@@ -624,15 +669,15 @@ else
value is greater than 377, the least significant 8 bits are taken. Inside a
character class, \ followed by a digit is always an octal number. */
- case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9':
+ case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
+ case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
if (!isclass)
{
oldptr = ptr;
- c -= '0';
+ c -= CHAR_0;
while ((digitab[ptr[1]] & ctype_digit) != 0)
- c = c * 10 + *(++ptr) - '0';
+ c = c * 10 + *(++ptr) - CHAR_0;
if (c < 0) /* Integer overflow */
{
*errorcodeptr = ERR61;
@@ -650,7 +695,7 @@ else
generates a binary zero byte and treats the digit as a following literal.
Thus we have to pull back the pointer by one. */
- if ((c = *ptr) >= '8')
+ if ((c = *ptr) >= CHAR_8)
{
ptr--;
c = 0;
@@ -663,10 +708,10 @@ else
to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
than 3 octal digits. */
- case '0':
- c -= '0';
- while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
- c = c * 8 + *(++ptr) - '0';
+ case CHAR_0:
+ c -= CHAR_0;
+ while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
+ c = c * 8 + *(++ptr) - CHAR_0;
if (!utf8 && c > 255) *errorcodeptr = ERR51;
break;
@@ -674,8 +719,8 @@ else
than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
treated as a data character. */
- case 'x':
- if (ptr[1] == '{')
+ case CHAR_x:
+ if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
{
const uschar *pt = ptr + 2;
int count = 0;
@@ -684,19 +729,19 @@ else
while ((digitab[*pt] & ctype_xdigit) != 0)
{
register int cc = *pt++;
- if (c == 0 && cc == '0') continue; /* Leading zeroes */
+ if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
count++;
-#ifndef EBCDIC /* ASCII coding */
- if (cc >= 'a') cc -= 32; /* Convert to upper case */
- c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
#else /* EBCDIC coding */
- if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */
- c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
+ if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
}
- if (*pt == '}')
+ if (*pt == CHAR_RIGHT_CURLY_BRACKET)
{
if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
ptr = pt;
@@ -712,14 +757,14 @@ else
c = 0;
while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
{
- int cc; /* Some compilers don't like ++ */
- cc = *(++ptr); /* in initializers */
-#ifndef EBCDIC /* ASCII coding */
- if (cc >= 'a') cc -= 32; /* Convert to upper case */
- c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
+ int cc; /* Some compilers don't like */
+ cc = *(++ptr); /* ++ in initializers */
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */
+ c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
#else /* EBCDIC coding */
- if (cc <= 'z') cc += 64; /* Convert to upper case */
- c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
+ if (cc <= CHAR_z) cc += 64; /* Convert to upper case */
+ c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
#endif
}
break;
@@ -728,7 +773,7 @@ else
This coding is ASCII-specific, but then the whole concept of \cx is
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
- case 'c':
+ case CHAR_c:
c = *(++ptr);
if (c == 0)
{
@@ -736,11 +781,11 @@ else
break;
}
-#ifndef EBCDIC /* ASCII coding */
- if (c >= 'a' && c <= 'z') c -= 32;
+#ifndef EBCDIC /* ASCII/UTF-8 coding */
+ if (c >= CHAR_a && c <= CHAR_z) c -= 32;
c ^= 0x40;
#else /* EBCDIC coding */
- if (c >= 'a' && c <= 'z') c += 64;
+ if (c >= CHAR_a && c <= CHAR_z) c += 64;
c ^= 0xC0;
#endif
break;
@@ -802,9 +847,9 @@ if (c == 0) goto ERROR_RETURN;
/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
negation. */
-if (c == '{')
+if (c == CHAR_LEFT_CURLY_BRACKET)
{
- if (ptr[1] == '^')
+ if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
{
*negptr = TRUE;
ptr++;
@@ -813,10 +858,10 @@ if (c == '{')
{
c = *(++ptr);
if (c == 0) goto ERROR_RETURN;
- if (c == '}') break;
+ if (c == CHAR_RIGHT_CURLY_BRACKET) break;
name[i] = c;
}
- if (c !='}') goto ERROR_RETURN;
+ if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
name[i] = 0;
}
@@ -881,15 +926,15 @@ is_counted_repeat(const uschar *p)
{
if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
while ((digitab[*p] & ctype_digit) != 0) p++;
-if (*p == '}') return TRUE;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-if (*p++ != ',') return FALSE;
-if (*p == '}') return TRUE;
+if (*p++ != CHAR_COMMA) return FALSE;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
while ((digitab[*p] & ctype_digit) != 0) p++;
-return (*p == '}');
+return (*p == CHAR_RIGHT_CURLY_BRACKET);
}
@@ -922,7 +967,7 @@ int max = -1;
/* Read the minimum value and do a paranoid check: a negative value indicates
an integer overflow. */
-while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
+while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
if (min < 0 || min > 65535)
{
*errorcodeptr = ERR5;
@@ -932,12 +977,12 @@ if (min < 0 || min > 65535)
/* Read the maximum value if there is one, and again do a paranoid on its size.
Also, max must not be less than min. */
-if (*p == '}') max = min; else
+if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
{
- if (*(++p) != '}')
+ if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
{
max = 0;
- while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
+ while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
if (max < 0 || max > 65535)
{
*errorcodeptr = ERR5;
@@ -962,47 +1007,116 @@ return p;
/*************************************************
-* Find forward referenced subpattern *
+* Subroutine for finding forward reference *
*************************************************/
-/* This function scans along a pattern's text looking for capturing
+/* This recursive function is called only from find_parens() below. The
+top-level call starts at the beginning of the pattern. All other calls must
+start at a parenthesis. It scans along a pattern's text looking for capturing
subpatterns, and counting them. If it finds a named pattern that matches the
name it is given, it returns its number. Alternatively, if the name is NULL, it
-returns when it reaches a given numbered subpattern. This is used for forward
-references to subpatterns. We know that if (?P< is encountered, the name will
-be terminated by '>' because that is checked in the first pass.
+returns when it reaches a given numbered subpattern. We know that if (?P< is
+encountered, the name will be terminated by '>' because that is checked in the
+first pass. Recursion is used to keep track of subpatterns that reset the
+capturing group numbers - the (?| feature.
Arguments:
- ptr current position in the pattern
+ ptrptr address of the current character pointer (updated)
cd compile background data
name name to seek, or NULL if seeking a numbered subpattern
lorn name length, or subpattern number if name is NULL
xmode TRUE if we are in /x mode
+ count pointer to the current capturing subpattern number (updated)
Returns: the number of the named subpattern, or -1 if not found
*/
static int
-find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
- BOOL xmode)
+find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
+ BOOL xmode, int *count)
{
-const uschar *thisname;
-int count = cd->bracount;
+uschar *ptr = *ptrptr;
+int start_count = *count;
+int hwm_count = start_count;
+BOOL dup_parens = FALSE;
-for (; *ptr != 0; ptr++)
+/* If the first character is a parenthesis, check on the type of group we are
+dealing with. The very first call may not start with a parenthesis. */
+
+if (ptr[0] == CHAR_LEFT_PARENTHESIS)
{
- int term;
+ if (ptr[1] == CHAR_QUESTION_MARK &&
+ ptr[2] == CHAR_VERTICAL_LINE)
+ {
+ ptr += 3;
+ dup_parens = TRUE;
+ }
+
+ /* Handle a normal, unnamed capturing parenthesis */
+
+ else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
+ {
+ *count += 1;
+ if (name == NULL && *count == lorn) return *count;
+ ptr++;
+ }
+
+ /* Handle a condition. If it is an assertion, just carry on so that it
+ is processed as normal. If not, skip to the closing parenthesis of the
+ condition (there can't be any nested parens. */
+
+ else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
+ {
+ ptr += 2;
+ if (ptr[1] != CHAR_QUESTION_MARK)
+ {
+ while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+ if (*ptr != 0) ptr++;
+ }
+ }
+
+ /* We have either (? or (* and not a condition */
+
+ else
+ {
+ ptr += 2;
+ if (*ptr == CHAR_P) ptr++; /* Allow optional P */
+
+ /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
+
+ if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
+ ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
+ {
+ int term;
+ const uschar *thisname;
+ *count += 1;
+ if (name == NULL && *count == lorn) return *count;
+ term = *ptr++;
+ if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
+ thisname = ptr;
+ while (*ptr != term) ptr++;
+ if (name != NULL && lorn == ptr - thisname &&
+ strncmp((const char *)name, (const char *)thisname, lorn) == 0)
+ return *count;
+ }
+ }
+ }
+/* Past any initial parenthesis handling, scan for parentheses or vertical
+bars. */
+
+for (; *ptr != 0; ptr++)
+ {
/* Skip over backslashed characters and also entire \Q...\E */
- if (*ptr == '\\')
+ if (*ptr == CHAR_BACKSLASH)
{
- if (*(++ptr) == 0) return -1;
- if (*ptr == 'Q') for (;;)
+ if (*(++ptr) == 0) goto FAIL_EXIT;
+ if (*ptr == CHAR_Q) for (;;)
{
- while (*(++ptr) != 0 && *ptr != '\\') {};
- if (*ptr == 0) return -1;
- if (*(++ptr) == 'E') break;
+ while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
+ if (*ptr == 0) goto FAIL_EXIT;
+ if (*(++ptr) == CHAR_E) break;
}
continue;
}
@@ -1010,21 +1124,26 @@ for (; *ptr != 0; ptr++)
/* Skip over character classes; this logic must be similar to the way they
are handled for real. If the first character is '^', skip it. Also, if the
first few characters (either before or after ^) are \Q\E or \E we skip them
- too. This makes for compatibility with Perl. */
+ too. This makes for compatibility with Perl. Note the use of STR macros to
+ encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
- if (*ptr == '[')
+ if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
{
BOOL negate_class = FALSE;
for (;;)
{
int c = *(++ptr);
- if (c == '\\')
+ if (c == CHAR_BACKSLASH)
{
- if (ptr[1] == 'E') ptr++;
- else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
- else break;
+ if (ptr[1] == CHAR_E)
+ ptr++;
+ else if (strncmp((const char *)ptr+1,
+ STR_Q STR_BACKSLASH STR_E, 3) == 0)
+ ptr += 3;
+ else
+ break;
}
- else if (!negate_class && c == '^')
+ else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
negate_class = TRUE;
else break;
}
@@ -1032,20 +1151,21 @@ for (; *ptr != 0; ptr++)
/* If the next character is ']', it is a data character that must be
skipped, except in JavaScript compatibility mode. */
- if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
+ if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
+ (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
ptr++;
- while (*(++ptr) != ']')
+ while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
{
if (*ptr == 0) return -1;
- if (*ptr == '\\')
+ if (*ptr == CHAR_BACKSLASH)
{
- if (*(++ptr) == 0) return -1;
- if (*ptr == 'Q') for (;;)
+ if (*(++ptr) == 0) goto FAIL_EXIT;
+ if (*ptr == CHAR_Q) for (;;)
{
- while (*(++ptr) != 0 && *ptr != '\\') {};
- if (*ptr == 0) return -1;
- if (*(++ptr) == 'E') break;
+ while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
+ if (*ptr == 0) goto FAIL_EXIT;
+ if (*(++ptr) == CHAR_E) break;
}
continue;
}
@@ -1055,49 +1175,92 @@ for (; *ptr != 0; ptr++)
/* Skip comments in /x mode */
- if (xmode && *ptr == '#')
+ if (xmode && *ptr == CHAR_NUMBER_SIGN)
{
- while (*(++ptr) != 0 && *ptr != '\n') {};
- if (*ptr == 0) return -1;
+ while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
+ if (*ptr == 0) goto FAIL_EXIT;
continue;
}
- /* An opening parens must now be a real metacharacter */
+ /* Check for the special metacharacters */
- if (*ptr != '(') continue;
- if (ptr[1] != '?' && ptr[1] != '*')
+ if (*ptr == CHAR_LEFT_PARENTHESIS)
{
- count++;
- if (name == NULL && count == lorn) return count;
- continue;
+ int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
+ if (rc > 0) return rc;
+ if (*ptr == 0) goto FAIL_EXIT;
+ }
+
+ else if (*ptr == CHAR_RIGHT_PARENTHESIS)
+ {
+ if (dup_parens && *count < hwm_count) *count = hwm_count;
+ *ptrptr = ptr;
+ return -1;
}
- ptr += 2;
- if (*ptr == 'P') ptr++; /* Allow optional P */
+ else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
+ {
+ if (*count > hwm_count) hwm_count = *count;
+ *count = start_count;
+ }
+ }
+
+FAIL_EXIT:
+*ptrptr = ptr;
+return -1;
+}
+
- /* We have to disambiguate (?<! and (?<= from (?<name> */
- if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&
- *ptr != '\'')
- continue;
- count++;
+/*************************************************
+* Find forward referenced subpattern *
+*************************************************/
+
+/* This function scans along a pattern's text looking for capturing
+subpatterns, and counting them. If it finds a named pattern that matches the
+name it is given, it returns its number. Alternatively, if the name is NULL, it
+returns when it reaches a given numbered subpattern. This is used for forward
+references to subpatterns. We used to be able to start this scan from the
+current compiling point, using the current count value from cd->bracount, and
+do it all in a single loop, but the addition of the possibility of duplicate
+subpattern numbers means that we have to scan from the very start, in order to
+take account of such duplicates, and to use a recursive function to keep track
+of the different types of group.
+
+Arguments:
+ cd compile background data
+ name name to seek, or NULL if seeking a numbered subpattern
+ lorn name length, or subpattern number if name is NULL
+ xmode TRUE if we are in /x mode
+
+Returns: the number of the found subpattern, or -1 if not found
+*/
+
+static int
+find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
+{
+uschar *ptr = (uschar *)cd->start_pattern;
+int count = 0;
+int rc;
- if (name == NULL && count == lorn) return count;
- term = *ptr++;
- if (term == '<') term = '>';
- thisname = ptr;
- while (*ptr != term) ptr++;
- if (name != NULL && lorn == ptr - thisname &&
- strncmp((const char *)name, (const char *)thisname, lorn) == 0)
- return count;
+/* If the pattern does not start with an opening parenthesis, the first call
+to find_parens_sub() will scan right to the end (if necessary). However, if it
+does start with a parenthesis, find_parens_sub() will return when it hits the
+matching closing parens. That is why we have to have a loop. */
+
+for (;;)
+ {
+ rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
+ if (rc > 0 || *ptr++ == 0) break;
}
-return -1;
+return rc;
}
+
/*************************************************
* Find first significant op code *
*************************************************/
@@ -1611,17 +1774,25 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
BOOL empty_branch;
if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */
- /* Scan a closed bracket */
+ /* If a conditional group has only one branch, there is a second, implied,
+ empty branch, so just skip over the conditional, because it could be empty.
+ Otherwise, scan the individual branches of the group. */
- empty_branch = FALSE;
- do
- {
- if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
- empty_branch = TRUE;
+ if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
code += GET(code, 1);
+ else
+ {
+ empty_branch = FALSE;
+ do
+ {
+ if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
+ empty_branch = TRUE;
+ code += GET(code, 1);
+ }
+ while (*code == OP_ALT);
+ if (!empty_branch) return FALSE; /* All branches are non-empty */
}
- while (*code == OP_ALT);
- if (!empty_branch) return FALSE; /* All branches are non-empty */
+
c = *code;
continue;
}
@@ -1823,10 +1994,10 @@ int terminator; /* Don't combine these lines; the Solaris cc */
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */
for (++ptr; *ptr != 0; ptr++)
{
- if (*ptr == '\\' && ptr[1] == ']') ptr++; else
+ if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
{
- if (*ptr == ']') return FALSE;
- if (*ptr == terminator && ptr[1] == ']')
+ if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
+ if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{
*endptr = ptr;
return TRUE;
@@ -2072,7 +2243,7 @@ if ((options & PCRE_EXTENDED) != 0)
for (;;)
{
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
- if (*ptr == '#')
+ if (*ptr == CHAR_NUMBER_SIGN)
{
while (*(++ptr) != 0)
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
@@ -2084,7 +2255,7 @@ if ((options & PCRE_EXTENDED) != 0)
/* If the next item is one that we can handle, get its value. A non-negative
value is a character, a negative value is an escape value. */
-if (*ptr == '\\')
+if (*ptr == CHAR_BACKSLASH)
{
int temperrorcode = 0;
next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
@@ -2109,7 +2280,7 @@ if ((options & PCRE_EXTENDED) != 0)
for (;;)
{
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
- if (*ptr == '#')
+ if (*ptr == CHAR_NUMBER_SIGN)
{
while (*(++ptr) != 0)
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
@@ -2120,8 +2291,9 @@ if ((options & PCRE_EXTENDED) != 0)
/* If the next thing is itself optional, we have to give up. */
-if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)
- return FALSE;
+if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
+ strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
+ return FALSE;
/* Now compare the next item with the previous opcode. If the previous is a
positive single character match, "item" either contains the character or, if
@@ -2559,7 +2731,7 @@ for (;; ptr++)
if (inescq && c != 0)
{
- if (c == '\\' && ptr[1] == 'E')
+ if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
{
inescq = FALSE;
ptr++;
@@ -2585,8 +2757,9 @@ for (;; ptr++)
/* Fill in length of a previous callout, except when the next thing is
a quantifier. */
- is_quantifier = c == '*' || c == '+' || c == '?' ||
- (c == '{' && is_counted_repeat(ptr+1));
+ is_quantifier =
+ c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
+ (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
if (!is_quantifier && previous_callout != NULL &&
after_manual_callout-- <= 0)
@@ -2601,7 +2774,7 @@ for (;; ptr++)
if ((options & PCRE_EXTENDED) != 0)
{
if ((cd->ctypes[c] & ctype_space) != 0) continue;
- if (c == '#')
+ if (c == CHAR_NUMBER_SIGN)
{
while (*(++ptr) != 0)
{
@@ -2626,8 +2799,8 @@ for (;; ptr++)
{
/* ===================================================================*/
case 0: /* The branch terminates at string end */
- case '|': /* or | or ) */
- case ')':
+ case CHAR_VERTICAL_LINE: /* or | or ) */
+ case CHAR_RIGHT_PARENTHESIS:
*firstbyteptr = firstbyte;
*reqbyteptr = reqbyte;
*codeptr = code;
@@ -2649,7 +2822,7 @@ for (;; ptr++)
/* Handle single-character metacharacters. In multiline mode, ^ disables
the setting of any following char as a first character. */
- case '^':
+ case CHAR_CIRCUMFLEX_ACCENT:
if ((options & PCRE_MULTILINE) != 0)
{
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
@@ -2658,7 +2831,7 @@ for (;; ptr++)
*code++ = OP_CIRC;
break;
- case '$':
+ case CHAR_DOLLAR_SIGN:
previous = NULL;
*code++ = OP_DOLL;
break;
@@ -2666,7 +2839,7 @@ for (;; ptr++)
/* There can never be a first char if '.' is first, whatever happens about
repeats. The value of reqbyte doesn't change either. */
- case '.':
+ case CHAR_DOT:
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
zerofirstbyte = firstbyte;
zeroreqbyte = reqbyte;
@@ -2690,7 +2863,7 @@ for (;; ptr++)
In JavaScript compatibility mode, an isolated ']' causes an error. In
default (Perl) mode, it is treated as a data character. */
- case ']':
+ case CHAR_RIGHT_SQUARE_BRACKET:
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
{
*errorcodeptr = ERR64;
@@ -2698,16 +2871,17 @@ for (;; ptr++)
}
goto NORMAL_CHAR;
- case '[':
+ case CHAR_LEFT_SQUARE_BRACKET:
previous = code;
/* PCRE supports POSIX class stuff inside a class. Perl gives an error if
they are encountered at the top level, so we'll do that too. */
- if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
+ if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+ ptr[1] == CHAR_EQUALS_SIGN) &&
check_posix_syntax(ptr, &tempptr))
{
- *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
+ *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
goto FAILED;
}
@@ -2719,13 +2893,17 @@ for (;; ptr++)
for (;;)
{
c = *(++ptr);
- if (c == '\\')
+ if (c == CHAR_BACKSLASH)
{
- if (ptr[1] == 'E') ptr++;
- else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
- else break;
+ if (ptr[1] == CHAR_E)
+ ptr++;
+ else if (strncmp((const char *)ptr+1,
+ STR_Q STR_BACKSLASH STR_E, 3) == 0)
+ ptr += 3;
+ else
+ break;
}
- else if (!negate_class && c == '^')
+ else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
negate_class = TRUE;
else break;
}
@@ -2735,7 +2913,8 @@ for (;; ptr++)
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
[^] must match any character, so generate OP_ALLANY. */
- if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
+ if (c == CHAR_RIGHT_SQUARE_BRACKET &&
+ (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
{
*code++ = negate_class? OP_ALLANY : OP_FAIL;
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
@@ -2800,7 +2979,7 @@ for (;; ptr++)
if (inescq)
{
- if (c == '\\' && ptr[1] == 'E') /* If we are at \E */
+ if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */
{
inescq = FALSE; /* Reset literal state */
ptr++; /* Skip the 'E' */
@@ -2815,23 +2994,23 @@ for (;; ptr++)
[.ch.] and [=ch=] ("collating elements") and fault them, as Perl
5.6 and 5.8 do. */
- if (c == '[' &&
- (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
- check_posix_syntax(ptr, &tempptr))
+ if (c == CHAR_LEFT_SQUARE_BRACKET &&
+ (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+ ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
{
BOOL local_negate = FALSE;
int posix_class, taboffset, tabopt;
register const uschar *cbits = cd->cbits;
uschar pbits[32];
- if (ptr[1] != ':')
+ if (ptr[1] != CHAR_COLON)
{
*errorcodeptr = ERR31;
goto FAILED;
}
ptr += 2;
- if (*ptr == '^')
+ if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
{
local_negate = TRUE;
should_flip_negation = TRUE; /* Note negative special */
@@ -2904,17 +3083,17 @@ for (;; ptr++)
to 'or' into the one we are building. We assume they have more than one
character in them, so set class_charcount bigger than one. */
- if (c == '\\')
+ if (c == CHAR_BACKSLASH)
{
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
if (*errorcodeptr != 0) goto FAILED;
- if (-c == ESC_b) c = '\b'; /* \b is backspace in a class */
- else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */
- else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */
+ if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
+ else if (-c == ESC_X) c = CHAR_X; /* \X is literal X in a class */
+ else if (-c == ESC_R) c = CHAR_R; /* \R is literal R in a class */
else if (-c == ESC_Q) /* Handle start of quoted string */
{
- if (ptr[1] == '\\' && ptr[2] == 'E')
+ if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
{
ptr += 2; /* avoid empty string */
}
@@ -3140,7 +3319,7 @@ for (;; ptr++)
entirely. The code for handling \Q and \E is messy. */
CHECK_RANGE:
- while (ptr[1] == '\\' && ptr[2] == 'E')
+ while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
{
inescq = FALSE;
ptr += 2;
@@ -3150,28 +3329,29 @@ for (;; ptr++)
/* Remember \r or \n */
- if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
+ if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
/* Check for range */
- if (!inescq && ptr[1] == '-')
+ if (!inescq && ptr[1] == CHAR_MINUS)
{
int d;
ptr += 2;
- while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;
+ while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
/* If we hit \Q (not followed by \E) at this point, go into escaped
mode. */
- while (*ptr == '\\' && ptr[1] == 'Q')
+ while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
{
ptr += 2;
- if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }
+ if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
+ { ptr += 2; continue; }
inescq = TRUE;
break;
}
- if (*ptr == 0 || (!inescq && *ptr == ']'))
+ if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
{
ptr = oldptr;
goto LONE_SINGLE_CHARACTER;
@@ -3190,7 +3370,7 @@ for (;; ptr++)
not any of the other escapes. Perl 5.6 treats a hyphen as a literal
in such circumstances. */
- if (!inescq && d == '\\')
+ if (!inescq && d == CHAR_BACKSLASH)
{
d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
if (*errorcodeptr != 0) goto FAILED;
@@ -3200,9 +3380,9 @@ for (;; ptr++)
if (d < 0)
{
- if (d == -ESC_b) d = '\b';
- else if (d == -ESC_X) d = 'X';
- else if (d == -ESC_R) d = 'R'; else
+ if (d == -ESC_b) d = CHAR_BS;
+ else if (d == -ESC_X) d = CHAR_X;
+ else if (d == -ESC_R) d = CHAR_R; else
{
ptr = oldptr;
goto LONE_SINGLE_CHARACTER; /* A few lines below */
@@ -3223,7 +3403,7 @@ for (;; ptr++)
/* Remember \r or \n */
- if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
+ if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
matching, we have to use an XCLASS with extra data items. Caseless
@@ -3370,7 +3550,7 @@ for (;; ptr++)
/* Loop until ']' reached. This "while" is the end of the "do" above. */
- while ((c = *(++ptr)) != 0 && (c != ']' || inescq));
+ while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
if (c == 0) /* Missing terminating ']' */
{
@@ -3515,23 +3695,23 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Various kinds of repeat; '{' is not necessarily a quantifier, but this
has been tested above. */
- case '{':
+ case CHAR_LEFT_CURLY_BRACKET:
if (!is_quantifier) goto NORMAL_CHAR;
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
if (*errorcodeptr != 0) goto FAILED;
goto REPEAT;
- case '*':
+ case CHAR_ASTERISK:
repeat_min = 0;
repeat_max = -1;
goto REPEAT;
- case '+':
+ case CHAR_PLUS:
repeat_min = 1;
repeat_max = -1;
goto REPEAT;
- case '?':
+ case CHAR_QUESTION_MARK:
repeat_min = 0;
repeat_max = 1;
@@ -3566,13 +3746,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
but if PCRE_UNGREEDY is set, it works the other way round. We change the
repeat type to the non-default. */
- if (ptr[1] == '+')
+ if (ptr[1] == CHAR_PLUS)
{
repeat_type = 0; /* Force greedy */
possessive_quantifier = TRUE;
ptr++;
}
- else if (ptr[1] == '?')
+ else if (ptr[1] == CHAR_QUESTION_MARK)
{
repeat_type = greedy_non_default;
ptr++;
@@ -4205,7 +4385,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
lookbehind or option setting or condition or all the other extended
parenthesis forms. */
- case '(':
+ case CHAR_LEFT_PARENTHESIS:
newoptions = options;
skipbytes = 0;
bravalue = OP_CBRA;
@@ -4214,19 +4394,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* First deal with various "verbs" that can be introduced by '*'. */
- if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
+ if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
{
int i, namelen;
const char *vn = verbnames;
const uschar *name = ++ptr;
previous = NULL;
while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
- if (*ptr == ':')
+ if (*ptr == CHAR_COLON)
{
*errorcodeptr = ERR59; /* Not supported */
goto FAILED;
}
- if (*ptr != ')')
+ if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR60;
goto FAILED;
@@ -4251,7 +4431,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Deal with the extended parentheses; all are introduced by '?', and the
appearance of any of them means that this is not a capturing group. */
- else if (*ptr == '?')
+ else if (*ptr == CHAR_QUESTION_MARK)
{
int i, set, unset, namelen;
int *optset;
@@ -4260,9 +4440,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
switch (*(++ptr))
{
- case '#': /* Comment; skip to ket */
+ case CHAR_NUMBER_SIGN: /* Comment; skip to ket */
ptr++;
- while (*ptr != 0 && *ptr != ')') ptr++;
+ while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr == 0)
{
*errorcodeptr = ERR18;
@@ -4272,19 +4452,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case '|': /* Reset capture count for each branch */
+ case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */
reset_bracount = TRUE;
/* Fall through */
/* ------------------------------------------------------------ */
- case ':': /* Non-capturing bracket */
+ case CHAR_COLON: /* Non-capturing bracket */
bravalue = OP_BRA;
ptr++;
break;
/* ------------------------------------------------------------ */
- case '(':
+ case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
/* A condition can be an assertion, a number (referring to a numbered
@@ -4304,7 +4484,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))
+ if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
+ ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
break;
/* Most other conditions use OP_CREF (a couple change to OP_RREF
@@ -4316,7 +4497,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Check for a test for recursion in a named group. */
- if (ptr[1] == 'R' && ptr[2] == '&')
+ if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
{
terminator = -1;
ptr += 2;
@@ -4326,20 +4507,20 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Check for a test for a named group's having been set, using the Perl
syntax (?(<name>) or (?('name') */
- else if (ptr[1] == '<')
+ else if (ptr[1] == CHAR_LESS_THAN_SIGN)
{
- terminator = '>';
+ terminator = CHAR_GREATER_THAN_SIGN;
ptr++;
}
- else if (ptr[1] == '\'')
+ else if (ptr[1] == CHAR_APOSTROPHE)
{
- terminator = '\'';
+ terminator = CHAR_APOSTROPHE;
ptr++;
}
else
{
terminator = 0;
- if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
+ if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
}
/* We now expect to read a name; any thing else is an error */
@@ -4359,12 +4540,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
if (recno >= 0)
recno = ((digitab[*ptr] & ctype_digit) != 0)?
- recno * 10 + *ptr - '0' : -1;
+ recno * 10 + *ptr - CHAR_0 : -1;
ptr++;
}
namelen = ptr - name;
- if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')
+ if ((terminator > 0 && *ptr++ != terminator) ||
+ *ptr++ != CHAR_RIGHT_PARENTHESIS)
{
ptr--; /* Error offset */
*errorcodeptr = ERR26;
@@ -4386,7 +4568,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*errorcodeptr = ERR58;
goto FAILED;
}
- recno = (refsign == '-')?
+ recno = (refsign == CHAR_MINUS)?
cd->bracount - recno + 1 : recno +cd->bracount;
if (recno <= 0 || recno > cd->final_bracount)
{
@@ -4417,7 +4599,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Search the pattern for a forward reference */
- else if ((i = find_parens(ptr, cd, name, namelen,
+ else if ((i = find_parens(cd, name, namelen,
(options & PCRE_EXTENDED) != 0)) > 0)
{
PUT2(code, 2+LINK_SIZE, i);
@@ -4438,7 +4620,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Check for (?(R) for recursion. Allow digits after R to specify a
specific group number. */
- else if (*name == 'R')
+ else if (*name == CHAR_R)
{
recno = 0;
for (i = 1; i < namelen; i++)
@@ -4448,7 +4630,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*errorcodeptr = ERR15;
goto FAILED;
}
- recno = recno * 10 + name[i] - '0';
+ recno = recno * 10 + name[i] - CHAR_0;
}
if (recno == 0) recno = RREF_ANY;
code[1+LINK_SIZE] = OP_RREF; /* Change test type */
@@ -4458,7 +4640,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Similarly, check for the (?(DEFINE) "condition", which is always
false. */
- else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)
+ else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
{
code[1+LINK_SIZE] = OP_DEF;
skipbytes = 1;
@@ -4483,16 +4665,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case '=': /* Positive lookahead */
+ case CHAR_EQUALS_SIGN: /* Positive lookahead */
bravalue = OP_ASSERT;
ptr++;
break;
/* ------------------------------------------------------------ */
- case '!': /* Negative lookahead */
+ case CHAR_EXCLAMATION_MARK: /* Negative lookahead */
ptr++;
- if (*ptr == ')') /* Optimize (?!) */
+ if (*ptr == CHAR_RIGHT_PARENTHESIS) /* Optimize (?!) */
{
*code++ = OP_FAIL;
previous = NULL;
@@ -4503,15 +4685,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case '<': /* Lookbehind or named define */
+ case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */
switch (ptr[1])
{
- case '=': /* Positive lookbehind */
+ case CHAR_EQUALS_SIGN: /* Positive lookbehind */
bravalue = OP_ASSERTBACK;
ptr += 2;
break;
- case '!': /* Negative lookbehind */
+ case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */
bravalue = OP_ASSERTBACK_NOT;
ptr += 2;
break;
@@ -4526,22 +4708,22 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case '>': /* One-time brackets */
+ case CHAR_GREATER_THAN_SIGN: /* One-time brackets */
bravalue = OP_ONCE;
ptr++;
break;
/* ------------------------------------------------------------ */
- case 'C': /* Callout - may be followed by digits; */
+ case CHAR_C: /* Callout - may be followed by digits; */
previous_callout = code; /* Save for later completion */
after_manual_callout = 1; /* Skip one item before completing */
*code++ = OP_CALLOUT;
{
int n = 0;
while ((digitab[*(++ptr)] & ctype_digit) != 0)
- n = n * 10 + *ptr - '0';
- if (*ptr != ')')
+ n = n * 10 + *ptr - CHAR_0;
+ if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR39;
goto FAILED;
@@ -4561,14 +4743,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case 'P': /* Python-style named subpattern handling */
- if (*(++ptr) == '=' || *ptr == '>') /* Reference or recursion */
+ case CHAR_P: /* Python-style named subpattern handling */
+ if (*(++ptr) == CHAR_EQUALS_SIGN ||
+ *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */
{
- is_recurse = *ptr == '>';
- terminator = ')';
+ is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
+ terminator = CHAR_RIGHT_PARENTHESIS;
goto NAMED_REF_OR_RECURSE;
}
- else if (*ptr != '<') /* Test for Python-style definition */
+ else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */
{
*errorcodeptr = ERR41;
goto FAILED;
@@ -4578,9 +4761,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
DEFINE_NAME: /* Come here from (?< handling */
- case '\'':
+ case CHAR_APOSTROPHE:
{
- terminator = (*ptr == '<')? '>' : '\'';
+ terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
+ CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
name = ++ptr;
while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
@@ -4654,8 +4838,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case '&': /* Perl recursion/subroutine syntax */
- terminator = ')';
+ case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */
+ terminator = CHAR_RIGHT_PARENTHESIS;
is_recurse = TRUE;
/* Fall through */
@@ -4714,7 +4898,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
recno = GET2(slot, 0);
}
else if ((recno = /* Forward back reference */
- find_parens(ptr, cd, name, namelen,
+ find_parens(cd, name, namelen,
(options & PCRE_EXTENDED) != 0)) <= 0)
{
*errorcodeptr = ERR15;
@@ -4730,18 +4914,18 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* ------------------------------------------------------------ */
- case 'R': /* Recursion */
+ case CHAR_R: /* Recursion */
ptr++; /* Same as (?0) */
/* Fall through */
/* ------------------------------------------------------------ */
- case '-': case '+':
- case '0': case '1': case '2': case '3': case '4': /* Recursion or */
- case '5': case '6': case '7': case '8': case '9': /* subroutine */
+ case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */
+ case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
+ case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
{
const uschar *called;
- terminator = ')';
+ terminator = CHAR_RIGHT_PARENTHESIS;
/* Come here from the \g<...> and \g'...' code (Oniguruma
compatibility). However, the syntax has been checked to ensure that
@@ -4751,7 +4935,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
HANDLE_NUMERICAL_RECURSION:
- if ((refsign = *ptr) == '+')
+ if ((refsign = *ptr) == CHAR_PLUS)
{
ptr++;
if ((digitab[*ptr] & ctype_digit) == 0)
@@ -4760,7 +4944,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
goto FAILED;
}
}
- else if (refsign == '-')
+ else if (refsign == CHAR_MINUS)
{
if ((digitab[ptr[1]] & ctype_digit) == 0)
goto OTHER_CHAR_AFTER_QUERY;
@@ -4769,7 +4953,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
recno = 0;
while((digitab[*ptr] & ctype_digit) != 0)
- recno = recno * 10 + *ptr++ - '0';
+ recno = recno * 10 + *ptr++ - CHAR_0;
if (*ptr != terminator)
{
@@ -4777,7 +4961,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
goto FAILED;
}
- if (refsign == '-')
+ if (refsign == CHAR_MINUS)
{
if (recno == 0)
{
@@ -4791,7 +4975,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
goto FAILED;
}
}
- else if (refsign == '+')
+ else if (refsign == CHAR_PLUS)
{
if (recno == 0)
{
@@ -4824,7 +5008,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
if (called == NULL)
{
- if (find_parens(ptr, cd, NULL, recno,
+ if (find_parens(cd, NULL, recno,
(options & PCRE_EXTENDED) != 0) < 0)
{
*errorcodeptr = ERR15;
@@ -4877,23 +5061,23 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
set = unset = 0;
optset = &set;
- while (*ptr != ')' && *ptr != ':')
+ while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
{
switch (*ptr++)
{
- case '-': optset = &unset; break;
+ case CHAR_MINUS: optset = &unset; break;
- case 'J': /* Record that it changed in the external options */
+ case CHAR_J: /* Record that it changed in the external options */
*optset |= PCRE_DUPNAMES;
cd->external_flags |= PCRE_JCHANGED;
break;
- case 'i': *optset |= PCRE_CASELESS; break;
- case 'm': *optset |= PCRE_MULTILINE; break;
- case 's': *optset |= PCRE_DOTALL; break;
- case 'x': *optset |= PCRE_EXTENDED; break;
- case 'U': *optset |= PCRE_UNGREEDY; break;
- case 'X': *optset |= PCRE_EXTRA; break;
+ case CHAR_i: *optset |= PCRE_CASELESS; break;
+ case CHAR_m: *optset |= PCRE_MULTILINE; break;
+ case CHAR_s: *optset |= PCRE_DOTALL; break;
+ case CHAR_x: *optset |= PCRE_EXTENDED; break;
+ case CHAR_U: *optset |= PCRE_UNGREEDY; break;
+ case CHAR_X: *optset |= PCRE_EXTRA; break;
default: *errorcodeptr = ERR12;
ptr--; /* Correct the offset */
@@ -4927,7 +5111,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
options if this setting actually changes any of them, and reset the
greedy defaults and the case value for firstbyte and reqbyte. */
- if (*ptr == ')')
+ if (*ptr == CHAR_RIGHT_PARENTHESIS)
{
if (code == cd->start_code + 1 + LINK_SIZE &&
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
@@ -5067,7 +5251,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Error if hit end of pattern */
- if (*ptr != ')')
+ if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR14;
goto FAILED;
@@ -5165,7 +5349,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
We can test for values between ESC_b and ESC_Z for the latter; this may
have to change if any new ones are ever created. */
- case '\\':
+ case CHAR_BACKSLASH:
tempptr = ptr;
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
if (*errorcodeptr != 0) goto FAILED;
@@ -5174,8 +5358,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
if (-c == ESC_Q) /* Handle start of quoted string */
{
- if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */
- else inescq = TRUE;
+ if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
+ ptr += 2; /* avoid empty string */
+ else inescq = TRUE;
continue;
}
@@ -5203,7 +5388,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
const uschar *p;
save_hwm = cd->hwm; /* Normally this is set when '(' is read */
- terminator = (*(++ptr) == '<')? '>' : '\'';
+ terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+ CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
/* These two statements stop the compiler for warning about possibly
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
@@ -5215,7 +5401,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Test for a name */
- if (ptr[1] != '+' && ptr[1] != '-')
+ if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
{
BOOL isnumber = TRUE;
for (p = ptr + 1; *p != 0 && *p != terminator; p++)
@@ -5253,10 +5439,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* \k<name> or \k'name' is a back reference by name (Perl syntax).
We also support \k{name} (.NET syntax) */
- if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
+ if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
+ ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
{
is_recurse = FALSE;
- terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
+ terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+ CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
+ CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
goto NAMED_REF_OR_RECURSE;
}
@@ -5359,7 +5548,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
/* Remember if \r or \n were seen */
- if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
+ if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
cd->external_flags |= PCRE_HASCRORLF;
/* Set the first and required bytes appropriately. If no previous first
@@ -5604,7 +5793,7 @@ for (;;)
compile a resetting op-code following, except at the very end of the pattern.
Return leaving the pointer at the terminating char. */
- if (*ptr != '|')
+ if (*ptr != CHAR_VERTICAL_LINE)
{
if (lengthptr == NULL)
{
@@ -5627,7 +5816,7 @@ for (;;)
/* Resetting option if needed */
- if ((options & PCRE_IMS) != oldims && *ptr == ')')
+ if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
{
*code++ = OP_OPT;
*code++ = oldims;
@@ -5809,6 +5998,32 @@ do {
NULL, 0, FALSE);
register int op = *scode;
+ /* If we are at the start of a conditional assertion group, *both* the
+ conditional assertion *and* what follows the condition must satisfy the test
+ for start of line. Other kinds of condition fail. Note that there may be an
+ auto-callout at the start of a condition. */
+
+ if (op == OP_COND)
+ {
+ scode += 1 + LINK_SIZE;
+ if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
+ switch (*scode)
+ {
+ case OP_CREF:
+ case OP_RREF:
+ case OP_DEF:
+ return FALSE;
+
+ default: /* Assertion */
+ if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
+ do scode += GET(scode, 1); while (*scode == OP_ALT);
+ scode += 1 + LINK_SIZE;
+ break;
+ }
+ scode = first_significant_code(scode, NULL, 0, FALSE);
+ op = *scode;
+ }
+
/* Non-capturing brackets */
if (op == OP_BRA)
@@ -5827,8 +6042,10 @@ do {
/* Other brackets */
- else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
- { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }
+ else if (op == OP_ASSERT || op == OP_ONCE)
+ {
+ if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
+ }
/* .* means "start at start or after \n" if it isn't in brackets that
may be referenced. */
@@ -6007,30 +6224,6 @@ if (erroroffset == NULL)
*erroroffset = 0;
-/* Can't support UTF8 unless PCRE has been compiled to include the code. */
-
-#ifdef SUPPORT_UTF8
-utf8 = (options & PCRE_UTF8) != 0;
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
- (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
- {
- errorcode = ERR44;
- goto PCRE_EARLY_ERROR_RETURN2;
- }
-#else
-if ((options & PCRE_UTF8) != 0)
- {
- errorcode = ERR32;
- goto PCRE_EARLY_ERROR_RETURN;
- }
-#endif
-
-if ((options & ~PUBLIC_OPTIONS) != 0)
- {
- errorcode = ERR17;
- goto PCRE_EARLY_ERROR_RETURN;
- }
-
/* Set up pointers to the individual character tables */
if (tables == NULL) tables = _pcre_default_tables;
@@ -6039,28 +6232,40 @@ cd->fcc = tables + fcc_offset;
cd->cbits = tables + cbits_offset;
cd->ctypes = tables + ctypes_offset;
+/* Check that all undefined public option bits are zero */
+
+if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
+ {
+ errorcode = ERR17;
+ goto PCRE_EARLY_ERROR_RETURN;
+ }
+
/* Check for global one-time settings at the start of the pattern, and remember
the offset for later. */
-while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
+while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
+ ptr[skipatstart+1] == CHAR_ASTERISK)
{
int newnl = 0;
int newbsr = 0;
- if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)
+ if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
+ { skipatstart += 7; options |= PCRE_UTF8; continue; }
+
+ if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
- else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
- else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5) == 0)
{ skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
- else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
{ skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
- else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
{ skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
- else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
{ skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
- else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)
+ else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
{ skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
if (newnl != 0)
@@ -6070,6 +6275,24 @@ while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
else break;
}
+/* Can't support UTF8 unless PCRE has been compiled to include the code. */
+
+#ifdef SUPPORT_UTF8
+utf8 = (options & PCRE_UTF8) != 0;
+if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
+ (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
+ {
+ errorcode = ERR44;
+ goto PCRE_EARLY_ERROR_RETURN2;
+ }
+#else
+if ((options & PCRE_UTF8) != 0)
+ {
+ errorcode = ERR32;
+ goto PCRE_EARLY_ERROR_RETURN;
+ }
+#endif
+
/* Check validity of \R options. */
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
@@ -6088,10 +6311,10 @@ current code allows for fixed one- or two-byte sequences, plus "any" and
switch (options & PCRE_NEWLINE_BITS)
{
case 0: newline = NEWLINE; break; /* Build-time default */
- case PCRE_NEWLINE_CR: newline = '\r'; break;
- case PCRE_NEWLINE_LF: newline = '\n'; break;
+ case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
+ case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
case PCRE_NEWLINE_CR+
- PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+ PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
case PCRE_NEWLINE_ANY: newline = -1; break;
case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;