summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-01 06:08:45 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-01 06:08:45 +0000
commitc9fa02b130f1a9da7b17b915e75248f19afb6d7a (patch)
treec7825abfb6f3affd9271e985cd51678d254a972a
parent00cc776fe74e502bc0774ceca2bb3f11283e189a (diff)
downloadpcre-c9fa02b130f1a9da7b17b915e75248f19afb6d7a.tar.gz
better digit parsing, first_byte, req_byte are renamed to first_char req_char respectively
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@774 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_compile.c430
-rw-r--r--pcre_dfa_exec.c57
-rw-r--r--pcre_exec.c60
-rw-r--r--pcre_fullinfo.c4
-rw-r--r--pcre_info.c2
-rw-r--r--pcre_internal.h36
-rw-r--r--pcre_jit_compile.c65
-rw-r--r--pcre_try_flipped.c8
-rw-r--r--pcretest.c30
9 files changed, 385 insertions, 307 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 46d881d..0bdd0fd 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -97,6 +97,10 @@ overrun before it actually does run off the end of the data block. */
#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
+/* Private flags added to firstchar and reqchar. */
+
+#define REQ_CASELESS 0x10000000l /* Indicates caselessness */
+#define REQ_VARY 0x20000000l /* Reqchar followed non-literal item */
/* Table for handling escaped characters in the range '0'-'z'. Positive returns
are simple data values; negative values are for special things like \d and so
@@ -484,12 +488,18 @@ For convenience, we use the same bit definitions as in chartables:
Then we can use ctype_digit and ctype_xdigit in the code. */
+/* Using a simple comparison for decimal numbers rather than a memory read
+is much faster, and the resulting code is simpler (the compiler turns it
+into a subtraction and unsigned comparison). */
+
+#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
+
#ifndef EBCDIC
/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
UTF-8 mode. */
-static const unsigned char digitab[] =
+static const pcre_uint8 digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
@@ -528,7 +538,7 @@ static const unsigned char digitab[] =
/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
-static const unsigned char digitab[] =
+static const pcre_unit8 digitab[] =
{
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */
@@ -563,7 +573,7 @@ static const unsigned char digitab[] =
0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */
0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */
-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
+static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */
0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */
@@ -651,15 +661,17 @@ Returns: TRUE or FALSE
static BOOL
is_counted_repeat(const pcre_uchar *p)
{
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;
if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
if (*p++ != CHAR_COMMA) return FALSE;
if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;
return (*p == CHAR_RIGHT_CURLY_BRACKET);
}
@@ -710,11 +722,13 @@ in a table. A non-zero result is something that can be returned immediately.
Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
-else if (c < CHAR_0 || c > CHAR_z) {} /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < CHAR_0 || c > CHAR_z) {}
else if ((i = escapes[c - CHAR_0]) != 0) c = i;
#else /* EBCDIC coding */
-else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
else if ((i = escapes[c - 0x48]) != 0) c = i;
#endif
@@ -740,8 +754,10 @@ else
{
/* In JavaScript, \u must be followed by four hexadecimal numbers.
Otherwise it is a lowercase u letter. */
- if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
- && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+ if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+ && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
+ && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
+ && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
{
c = 0;
for (i = 0; i < 4; ++i)
@@ -797,7 +813,7 @@ else
{
const pcre_uchar *p;
for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
- if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
+ if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
{
c = -ESC_k;
@@ -815,12 +831,21 @@ else
}
else negated = FALSE;
+ /* The integer range is limited by the machine's int representation. */
c = 0;
- while ((digitab[ptr[1]] & ctype_digit) != 0)
+ while (IS_DIGIT(ptr[1]))
+ {
+ if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+ {
+ c = -1;
+ break;
+ }
c = c * 10 + *(++ptr) - CHAR_0;
-
- if (c < 0) /* Integer overflow */
+ }
+ if (((unsigned int)c) > INT_MAX) /* Integer overflow */
{
+ while (IS_DIGIT(ptr[1]))
+ ptr++;
*errorcodeptr = ERR61;
break;
}
@@ -868,11 +893,21 @@ else
if (!isclass)
{
oldptr = ptr;
+ /* The integer range is limited by the machine's int representation. */
c -= CHAR_0;
- while ((digitab[ptr[1]] & ctype_digit) != 0)
+ while (IS_DIGIT(ptr[1]))
+ {
+ if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+ {
+ c = -1;
+ break;
+ }
c = c * 10 + *(++ptr) - CHAR_0;
- if (c < 0) /* Integer overflow */
+ }
+ if (((unsigned int)c) > INT_MAX) /* Integer overflow */
{
+ while (IS_DIGIT(ptr[1]))
+ ptr++;
*errorcodeptr = ERR61;
break;
}
@@ -905,7 +940,7 @@ else
c -= CHAR_0;
while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
c = c * 8 + *(++ptr) - CHAR_0;
- if (!utf8 && c > 255) *errorcodeptr = ERR51;
+ if (!utf8 && c > 0xff) *errorcodeptr = ERR51;
break;
/* \x is complicated. \x{ddd} is a character number which can be greater
@@ -917,7 +952,8 @@ else
{
/* In JavaScript, \x must be followed by two hexadecimal numbers.
Otherwise it is a lowercase x letter. */
- if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+ if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+ && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
{
c = 0;
for (i = 0; i < 2; ++i)
@@ -941,7 +977,7 @@ else
int count = 0;
c = 0;
- while ((digitab[*pt] & ctype_xdigit) != 0)
+ while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
{
register int cc = *pt++;
if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */
@@ -958,7 +994,13 @@ else
if (*pt == CHAR_RIGHT_CURLY_BRACKET)
{
- if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
+#ifdef COMPILE_PCRE8
+ if (c < 0 || count > (utf8? 8:2)) *errorcodeptr = ERR34;
+#else
+#ifdef COMPILE_PCRE16
+ if (c < 0 || count > (utf8? 8:4)) *errorcodeptr = ERR34;
+#endif
+#endif
ptr = pt;
break;
}
@@ -970,7 +1012,7 @@ else
/* Read just a single-byte hex-defined char */
c = 0;
- while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
+ while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
{
int cc; /* Some compilers don't like */
cc = *(++ptr); /* ++ in initializers */
@@ -1169,7 +1211,7 @@ int max = -1;
/* Read the minimum value and do a paranoid check: a negative value indicates
an integer overflow. */
-while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
+while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;
if (min < 0 || min > 65535)
{
*errorcodeptr = ERR5;
@@ -1184,7 +1226,7 @@ if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
{
max = 0;
- while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
+ while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;
if (max < 0 || max > 65535)
{
*errorcodeptr = ERR5;
@@ -3258,8 +3300,8 @@ Arguments:
codeptr points to the pointer to the current code point
ptrptr points to the current pattern pointer
errorcodeptr points to error code variable
- firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
- reqbyteptr set to the last literal character required, else < 0
+ firstcharptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
+ reqcharptr set to the last literal character required, else < 0
bcptr points to current branch chain
cond_depth conditional nesting depth
cd contains pointers to tables etc.
@@ -3272,17 +3314,17 @@ Returns: TRUE on success
static BOOL
compile_branch(int *optionsptr, pcre_uchar **codeptr,
- const pcre_uchar **ptrptr, int *errorcodeptr, int *firstbyteptr,
- int *reqbyteptr, branch_chain *bcptr, int cond_depth, compile_data *cd,
- int *lengthptr)
+ const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,
+ pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,
+ compile_data *cd, int *lengthptr)
{
int repeat_type, op_type;
int repeat_min = 0, repeat_max = 0; /* To please picky compilers */
int bravalue = 0;
int greedy_default, greedy_non_default;
-int firstbyte, reqbyte;
-int zeroreqbyte, zerofirstbyte;
-int req_caseopt, reqvary, tempreqvary;
+pcre_int32 firstchar, reqchar;
+pcre_int32 zeroreqchar, zerofirstchar;
+pcre_int32 req_caseopt, reqvary, tempreqvary;
int options = *optionsptr; /* May change dynamically */
int after_manual_callout = 0;
int length_prevgroup = 0;
@@ -3292,7 +3334,7 @@ pcre_uchar *last_code = code;
pcre_uchar *orig_code = code;
pcre_uchar *tempcode;
BOOL inescq = FALSE;
-BOOL groupsetfirstbyte = FALSE;
+BOOL groupsetfirstchar = FALSE;
const pcre_uchar *ptr = *ptrptr;
const pcre_uchar *tempptr;
const pcre_uchar *nestptr = NULL;
@@ -3331,22 +3373,23 @@ greedy_non_default = greedy_default ^ 1;
/* Initialize no first byte, no required byte. REQ_UNSET means "no char
matching encountered yet". It gets changed to REQ_NONE if we hit something that
-matches a non-fixed char first char; reqbyte just remains unset if we never
+matches a non-fixed char first char; reqchar just remains unset if we never
find one.
When we hit a repeat whose minimum is zero, we may have to adjust these values
to take the zero repeat into account. This is implemented by setting them to
-zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual
+zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
item types that can be repeated set these backoff variables appropriately. */
-firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
+firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;
-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
-according to the current setting of the caseless flag. REQ_CASELESS is a bit
-value > 255. It is added into the firstbyte or reqbyte variables to record the
-case status of the value. This is used only for ASCII characters. */
+/* The variable req_caseopt contains either the REQ_CASELESS value
+or zero, according to the current setting of the caseless flag. The
+REQ_CASELESS leaves the lower 28 bit empty. It is added into the
+firstchar or reqchar variables to record the case status of the
+value. This is used only for ASCII characters. */
-req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
/* Switch on next character until the end of the branch */
@@ -3364,8 +3407,8 @@ for (;; ptr++)
int recno;
int refsign;
int skipbytes;
- int subreqbyte;
- int subfirstbyte;
+ int subreqchar;
+ int subfirstchar;
int terminator;
int mclength;
int tempbracount;
@@ -3528,8 +3571,8 @@ for (;; ptr++)
case 0: /* The branch terminates at string end */
case CHAR_VERTICAL_LINE: /* or | or ) */
case CHAR_RIGHT_PARENTHESIS:
- *firstbyteptr = firstbyte;
- *reqbyteptr = reqbyte;
+ *firstcharptr = firstchar;
+ *reqcharptr = reqchar;
*codeptr = code;
*ptrptr = ptr;
if (lengthptr != NULL)
@@ -3553,7 +3596,7 @@ for (;; ptr++)
previous = NULL;
if ((options & PCRE_MULTILINE) != 0)
{
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
*code++ = OP_CIRCM;
}
else *code++ = OP_CIRC;
@@ -3565,12 +3608,12 @@ for (;; ptr++)
break;
/* There can never be a first char if '.' is first, whatever happens about
- repeats. The value of reqbyte doesn't change either. */
+ repeats. The value of reqchar doesn't change either. */
case CHAR_DOT:
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
- zerofirstbyte = firstbyte;
- zeroreqbyte = reqbyte;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+ zerofirstchar = firstchar;
+ zeroreqchar = reqchar;
previous = code;
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
break;
@@ -3644,8 +3687,8 @@ for (;; ptr++)
(cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
{
*code++ = negate_class? OP_ALLANY : OP_FAIL;
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
- zerofirstbyte = firstbyte;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+ zerofirstchar = firstchar;
break;
}
@@ -4335,9 +4378,9 @@ for (;; ptr++)
The optimization throws away the bit map. We turn the item into a
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
Note that OP_NOT[I] does not support multibyte characters. In the positive
- case, it can cause firstbyte to be set. Otherwise, there can be no first
+ case, it can cause firstchar to be set. Otherwise, there can be no first
char if this item is first, whatever repeat count may follow. In the case
- of reqbyte, save the previous value for reinstating. */
+ of reqchar, save the previous value for reinstating. */
#ifdef SUPPORT_UTF
if (class_charcount == 1 && !xclass &&
@@ -4348,14 +4391,14 @@ for (;; ptr++)
if (class_charcount == 1 && !xclass)
#endif
{
- zeroreqbyte = reqbyte;
+ zeroreqchar = reqchar;
/* The OP_NOT[I] opcodes work on one-byte characters only. */
if (negate_class)
{
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
- zerofirstbyte = firstbyte;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+ zerofirstchar = firstchar;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
*code++ = class_lastchar;
break;
@@ -4378,12 +4421,12 @@ for (;; ptr++)
/* The general case - not the one-char optimization. If this is the first
thing in the branch, there can be no first char setting, whatever the
- repeat count. Any reqbyte setting must remain unchanged after any kind of
+ repeat count. Any reqchar setting must remain unchanged after any kind of
repeat. */
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
- zerofirstbyte = firstbyte;
- zeroreqbyte = reqbyte;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+ zerofirstchar = firstchar;
+ zeroreqchar = reqchar;
/* If there are characters with values > 255, we have to compile an
extended class, with its own opcode, unless there was a negated special
@@ -4476,8 +4519,8 @@ for (;; ptr++)
if (repeat_min == 0)
{
- firstbyte = zerofirstbyte; /* Adjust for zero repeat */
- reqbyte = zeroreqbyte; /* Ditto */
+ firstchar = zerofirstchar; /* Adjust for zero repeat */
+ reqchar = zeroreqchar; /* Ditto */
}
/* Remember whether this is a variable length repeat */
@@ -4542,8 +4585,8 @@ for (;; ptr++)
/* If previous was a character match, abolish the item and generate a
repeat item instead. If a char item has a minumum of more than one, ensure
- that it is set in reqbyte - it might not be if a sequence such as x{3} is
- the first thing in a branch because the x will have gone into firstbyte
+ that it is set in reqchar - it might not be if a sequence such as x{3} is
+ the first thing in a branch because the x will have gone into firstchar
instead. */
if (*previous == OP_CHAR || *previous == OP_CHARI)
@@ -4572,7 +4615,7 @@ for (;; ptr++)
{
c = code[-1];
- if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
+ if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
}
/* If the repetition is unlimited, it pays to see if the next thing on
@@ -4971,7 +5014,7 @@ for (;; ptr++)
else
{
- if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
+ if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
for (i = 1; i < repeat_min; i++)
{
pcre_uchar *hc;
@@ -5274,7 +5317,7 @@ for (;; ptr++)
}
/* In all case we no longer have a previous item. We also set the
- "follows varying string" flag for subsequently encountered reqbytes if
+ "follows varying string" flag for subsequently encountered reqchars if
it isn't already set and we have just passed a varying length item. */
END_REPEAT:
@@ -5352,8 +5395,8 @@ for (;; ptr++)
}
*code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
- /* Do not set firstbyte after *ACCEPT */
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+ /* Do not set firstchar after *ACCEPT */
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
}
/* Handle other cases with/without an argument */
@@ -5506,8 +5549,7 @@ for (;; ptr++)
while ((cd->ctypes[*ptr] & ctype_word) != 0)
{
if (recno >= 0)
- recno = ((digitab[*ptr] & ctype_digit) != 0)?
- recno * 10 + *ptr - CHAR_0 : -1;
+ recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1;
ptr++;
}
namelen = (int)(ptr - name);
@@ -5597,7 +5639,7 @@ for (;; ptr++)
recno = 0;
for (i = 1; i < namelen; i++)
{
- if ((digitab[name[i]] & ctype_digit) == 0)
+ if (!IS_DIGIT(name[i]))
{
*errorcodeptr = ERR15;
goto FAILED;
@@ -5697,8 +5739,9 @@ for (;; ptr++)
*code++ = OP_CALLOUT;
{
int n = 0;
- while ((digitab[*(++ptr)] & ctype_digit) != 0)
- n = n * 10 + *ptr - CHAR_0;
+ ptr++;
+ while(IS_DIGIT(*ptr))
+ n = n * 10 + *ptr++ - CHAR_0;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR39;
@@ -5981,7 +6024,7 @@ for (;; ptr++)
if ((refsign = *ptr) == CHAR_PLUS)
{
ptr++;
- if ((digitab[*ptr] & ctype_digit) == 0)
+ if (!IS_DIGIT(*ptr))
{
*errorcodeptr = ERR63;
goto FAILED;
@@ -5989,13 +6032,13 @@ for (;; ptr++)
}
else if (refsign == CHAR_MINUS)
{
- if ((digitab[ptr[1]] & ctype_digit) == 0)
+ if (!IS_DIGIT(ptr[1]))
goto OTHER_CHAR_AFTER_QUERY;
ptr++;
}
recno = 0;
- while((digitab[*ptr] & ctype_digit) != 0)
+ while(IS_DIGIT(*ptr))
recno = recno * 10 + *ptr++ - CHAR_0;
if (*ptr != terminator)
@@ -6093,7 +6136,7 @@ for (;; ptr++)
/* Can't determine a first byte now */
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
continue;
@@ -6150,7 +6193,7 @@ for (;; ptr++)
both phases.
If we are not at the pattern start, reset the greedy defaults and the
- case value for firstbyte and reqbyte. */
+ case value for firstchar and reqchar. */
if (*ptr == CHAR_RIGHT_PARENTHESIS)
{
@@ -6163,7 +6206,7 @@ for (;; ptr++)
{
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+ req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
}
/* Change options at this level, and pass them back for use
@@ -6226,8 +6269,8 @@ for (;; ptr++)
skipbytes, /* Skip over bracket number */
cond_depth +
((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */
- &subfirstbyte, /* For possible first char */
- &subreqbyte, /* For possible last char */
+ &subfirstchar, /* For possible first char */
+ &subreqchar, /* For possible last char */
bcptr, /* Current branch chain */
cd, /* Tables block */
(lengthptr == NULL)? NULL : /* Actual compile phase */
@@ -6278,7 +6321,7 @@ for (;; ptr++)
}
/* A "normal" conditional group. If there is just one branch, we must not
- make use of its firstbyte or reqbyte, because this is equivalent to an
+ make use of its firstchar or reqchar, because this is equivalent to an
empty second branch. */
else
@@ -6288,7 +6331,7 @@ for (;; ptr++)
*errorcodeptr = ERR27;
goto FAILED;
}
- if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
+ if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;
}
}
@@ -6332,55 +6375,55 @@ for (;; ptr++)
/* Handle updating of the required and first characters for other types of
group. Update for normal brackets of all kinds, and conditions with two
branches (see code above). If the bracket is followed by a quantifier with
- zero repeat, we have to back off. Hence the definition of zeroreqbyte and
- zerofirstbyte outside the main loop so that they can be accessed for the
+ zero repeat, we have to back off. Hence the definition of zeroreqchar and
+ zerofirstchar outside the main loop so that they can be accessed for the
back off. */
- zeroreqbyte = reqbyte;
- zerofirstbyte = firstbyte;
- groupsetfirstbyte = FALSE;
+ zeroreqchar = reqchar;
+ zerofirstchar = firstchar;
+ groupsetfirstchar = FALSE;
if (bravalue >= OP_ONCE)
{
- /* If we have not yet set a firstbyte in this branch, take it from the
+ /* If we have not yet set a firstchar in this branch, take it from the
subpattern, remembering that it was set here so that a repeat of more
- than one can replicate it as reqbyte if necessary. If the subpattern has
- no firstbyte, set "none" for the whole branch. In both cases, a zero
- repeat forces firstbyte to "none". */
+ than one can replicate it as reqchar if necessary. If the subpattern has
+ no firstchar, set "none" for the whole branch. In both cases, a zero
+ repeat forces firstchar to "none". */
- if (firstbyte == REQ_UNSET)
+ if (firstchar == REQ_UNSET)
{
- if (subfirstbyte >= 0)
+ if (subfirstchar >= 0)
{
- firstbyte = subfirstbyte;
- groupsetfirstbyte = TRUE;
+ firstchar = subfirstchar;
+ groupsetfirstchar = TRUE;
}
- else firstbyte = REQ_NONE;
- zerofirstbyte = REQ_NONE;
+ else firstchar = REQ_NONE;
+ zerofirstchar = REQ_NONE;
}
- /* If firstbyte was previously set, convert the subpattern's firstbyte
- into reqbyte if there wasn't one, using the vary flag that was in
+ /* If firstchar was previously set, convert the subpattern's firstchar
+ into reqchar if there wasn't one, using the vary flag that was in
existence beforehand. */
- else if (subfirstbyte >= 0 && subreqbyte < 0)
- subreqbyte = subfirstbyte | tempreqvary;
+ else if (subfirstchar >= 0 && subreqchar < 0)
+ subreqchar = subfirstchar | tempreqvary;
/* If the subpattern set a required byte (or set a first byte that isn't
really the first byte - see above), set it. */
- if (subreqbyte >= 0) reqbyte = subreqbyte;
+ if (subreqchar >= 0) reqchar = subreqchar;
}
- /* For a forward assertion, we take the reqbyte, if set. This can be
+ /* For a forward assertion, we take the reqchar, if set. This can be
helpful if the pattern that follows the assertion doesn't set a different
- char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte
+ char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
for an assertion, however because it leads to incorrect effect for patterns
- such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead
- of a firstbyte. This is overcome by a scan at the end if there's no
- firstbyte, looking for an asserted first char. */
+ such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
+ of a firstchar. This is overcome by a scan at the end if there's no
+ firstchar, looking for an asserted first char. */
- else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
+ else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;
break; /* End of processing '(' */
@@ -6413,13 +6456,13 @@ for (;; ptr++)
/* For metasequences that actually match a character, we disable the
setting of a first character if it hasn't already been set. */
- if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
- firstbyte = REQ_NONE;
+ if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
+ firstchar = REQ_NONE;
/* Set values to reset to if this is followed by a zero repeat. */
- zerofirstbyte = firstbyte;
- zeroreqbyte = reqbyte;
+ zerofirstchar = firstchar;
+ zeroreqchar = reqchar;
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
is a subroutine call by number (Oniguruma syntax). In fact, the value
@@ -6470,7 +6513,7 @@ for (;; ptr++)
/* Test a signed number in angle brackets or quotes. */
p = ptr + 2;
- while ((digitab[*p] & ctype_digit) != 0) p++;
+ while (IS_DIGIT(*p)) p++;
if (*p != terminator)
{
*errorcodeptr = ERR57;
@@ -6498,7 +6541,7 @@ for (;; ptr++)
goto NAMED_REF_OR_RECURSE;
}
- /* Back references are handled specially; must disable firstbyte if
+ /* Back references are handled specially; must disable firstchar if
not set to cope with cases like (?=(\w+))\1: which would otherwise set
':' later. */
@@ -6508,7 +6551,7 @@ for (;; ptr++)
recno = -c - ESC_REF;
HANDLE_REFERENCE: /* Come here from named backref handling */
- if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+ if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
previous = code;
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
PUT2INC(code, 0, recno);
@@ -6631,34 +6674,34 @@ for (;; ptr++)
/* Set the first and required bytes appropriately. If no previous first
byte, set it from this character, but revert to none on a zero repeat.
- Otherwise, leave the firstbyte value alone, and don't change it on a zero
+ Otherwise, leave the firstchar value alone, and don't change it on a zero
repeat. */
- if (firstbyte == REQ_UNSET)
+ if (firstchar == REQ_UNSET)
{
- zerofirstbyte = REQ_NONE;
- zeroreqbyte = reqbyte;
+ zerofirstchar = REQ_NONE;
+ zeroreqchar = reqchar;
- /* If the character is more than one byte long, we can set firstbyte
+ /* If the character is more than one byte long, we can set firstchar
only if it is not to be matched caselessly. */
if (mclength == 1 || req_caseopt == 0)
{
- firstbyte = mcbuffer[0] | req_caseopt;
- if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
+ firstchar = mcbuffer[0] | req_caseopt;
+ if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;
}
- else firstbyte = reqbyte = REQ_NONE;
+ else firstchar = reqchar = REQ_NONE;
}
- /* firstbyte was previously set; we can set reqbyte only if the length is
+ /* firstchar was previously set; we can set reqchar only if the length is
1 or the matching is caseful. */
else
{
- zerofirstbyte = firstbyte;
- zeroreqbyte = reqbyte;
+ zerofirstchar = firstchar;
+ zeroreqchar = reqchar;
if (mclength == 1 || req_caseopt == 0)
- reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
+ reqchar = code[-1] | req_caseopt | cd->req_varyopt;
}
break; /* End of literal character handling */
@@ -6698,8 +6741,8 @@ Arguments:
reset_bracount TRUE to reset the count for each branch
skipbytes skip this many bytes at start (for brackets and OP_COND)
cond_depth depth of nesting for conditional subpatterns
- firstbyteptr place to put the first required character, or a negative number
- reqbyteptr place to put the last required character, or a negative number
+ firstcharptr place to put the first required character, or a negative number
+ reqcharptr place to put the last required character, or a negative number
bcptr pointer to the chain of currently open branches
cd points to the data block with tables pointers etc.
lengthptr NULL during the real compile phase
@@ -6711,8 +6754,8 @@ Returns: TRUE on success
static BOOL
compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
- int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
- compile_data *cd, int *lengthptr)
+ int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,
+ branch_chain *bcptr, compile_data *cd, int *lengthptr)
{
const pcre_uchar *ptr = *ptrptr;
pcre_uchar *code = *codeptr;
@@ -6721,8 +6764,8 @@ pcre_uchar *start_bracket = code;
pcre_uchar *reverse_count = NULL;
open_capitem capitem;
int capnumber = 0;
-int firstbyte, reqbyte;
-int branchfirstbyte, branchreqbyte;
+pcre_int32 firstchar, reqchar;
+pcre_int32 branchfirstchar, branchreqchar;
int length;
int orig_bracount;
int max_bracount;
@@ -6731,7 +6774,7 @@ branch_chain bc;
bc.outer = bcptr;
bc.current_branch = code;
-firstbyte = reqbyte = REQ_UNSET;
+firstchar = reqchar = REQ_UNSET;
/* Accumulate the length for use in the pre-compile phase. Start with the
length of the BRA and KET and any extra bytes that are required at the
@@ -6790,8 +6833,8 @@ for (;;)
/* Now compile the branch; in the pre-compile phase its length gets added
into the length. */
- if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
- &branchreqbyte, &bc, cond_depth, cd,
+ if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
+ &branchreqchar, &bc, cond_depth, cd,
(lengthptr == NULL)? NULL : &length))
{
*ptrptr = ptr;
@@ -6807,43 +6850,43 @@ for (;;)
if (lengthptr == NULL)
{
- /* If this is the first branch, the firstbyte and reqbyte values for the
+ /* If this is the first branch, the firstchar and reqchar values for the
branch become the values for the regex. */
if (*last_branch != OP_ALT)
{
- firstbyte = branchfirstbyte;
- reqbyte = branchreqbyte;
+ firstchar = branchfirstchar;
+ reqchar = branchreqchar;
}
- /* If this is not the first branch, the first char and reqbyte have to
+ /* If this is not the first branch, the first char and reqchar have to
match the values from all the previous branches, except that if the
- previous value for reqbyte didn't have REQ_VARY set, it can still match,
+ previous value for reqchar didn't have REQ_VARY set, it can still match,
and we set REQ_VARY for the regex. */
else
{
- /* If we previously had a firstbyte, but it doesn't match the new branch,
- we have to abandon the firstbyte for the regex, but if there was
- previously no reqbyte, it takes on the value of the old firstbyte. */
+ /* If we previously had a firstchar, but it doesn't match the new branch,
+ we have to abandon the firstchar for the regex, but if there was
+ previously no reqchar, it takes on the value of the old firstchar. */
- if (firstbyte >= 0 && firstbyte != branchfirstbyte)
+ if (firstchar >= 0 && firstchar != branchfirstchar)
{
- if (reqbyte < 0) reqbyte = firstbyte;
- firstbyte = REQ_NONE;
+ if (reqchar < 0) reqchar = firstchar;
+ firstchar = REQ_NONE;
}
- /* If we (now or from before) have no firstbyte, a firstbyte from the
- branch becomes a reqbyte if there isn't a branch reqbyte. */
+ /* If we (now or from before) have no firstchar, a firstchar from the
+ branch becomes a reqchar if there isn't a branch reqchar. */
- if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
- branchreqbyte = branchfirstbyte;
+ if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)
+ branchreqchar = branchfirstchar;
- /* Now ensure that the reqbytes match */
+ /* Now ensure that the reqchars match */
- if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
- reqbyte = REQ_NONE;
- else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */
+ if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))
+ reqchar = REQ_NONE;
+ else reqchar |= branchreqchar; /* To "or" REQ_VARY */
}
/* If lookbehind, check that this branch matches a fixed-length string, and
@@ -6933,8 +6976,8 @@ for (;;)
*codeptr = code;
*ptrptr = ptr;
- *firstbyteptr = firstbyte;
- *reqbyteptr = reqbyte;
+ *firstcharptr = firstchar;
+ *reqcharptr = reqchar;
if (lengthptr != NULL)
{
if (OFLOW_MAX - *lengthptr < length)
@@ -7313,7 +7356,8 @@ pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
{
real_pcre *re;
int length = 1; /* For final END opcode */
-int firstbyte, reqbyte, newline;
+pcre_int32 firstchar, reqchar;
+int newline;
int errorcode = 0;
int skipatstart = 0;
BOOL utf8;
@@ -7541,7 +7585,7 @@ ptr += skipatstart;
code = cworkspace;
*code = OP_BRA;
(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
- FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);
+ FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
@@ -7578,8 +7622,8 @@ re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
re->dummy1 = 0;
-re->first_byte = 0;
-re->req_byte = 0;
+re->first_char = 0;
+re->req_char = 0;
re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
re->name_entry_size = cd->name_entry_size;
re->name_count = cd->names_found;
@@ -7615,12 +7659,12 @@ ptr = (const pcre_uchar *)pattern + skipatstart;
code = (pcre_uchar *)codestart;
*code = OP_BRA;
(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
- &firstbyte, &reqbyte, NULL, cd, NULL);
+ &firstchar, &reqchar, NULL, cd, NULL);
re->top_bracket = cd->bracount;
re->top_backref = cd->top_backref;
re->flags = cd->external_flags;
-if (cd->had_accept) reqbyte = REQ_NONE; /* Must disable after (*ACCEPT) */
+if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */
/* If not reached end of pattern on success, there's an excess bracket. */
@@ -7726,13 +7770,21 @@ if ((re->options & PCRE_ANCHORED) == 0)
re->options |= PCRE_ANCHORED;
else
{
- if (firstbyte < 0)
- firstbyte = find_firstassertedchar(codestart, FALSE);
- if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */
+ if (firstchar < 0)
+ firstchar = find_firstassertedchar(codestart, FALSE);
+ if (firstchar >= 0) /* Remove caseless flag for non-caseable chars */
{
- int ch = firstbyte & 255;
- re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
- cd->fcc[ch] == ch)? ch : firstbyte;
+#ifdef COMPILE_PCRE8
+ re->first_char = firstchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+ re->first_char = firstchar & 0xffff;
+#endif
+#endif
+ if ((firstchar & REQ_CASELESS) != 0 && MAX_255(re->first_char)
+ && cd->fcc[re->first_char] != re->first_char)
+ re->flags |= PCRE_FCH_CASELESS;
+
re->flags |= PCRE_FIRSTSET;
}
else if (is_startline(codestart, 0, cd->backref_map))
@@ -7744,12 +7796,20 @@ if ((re->options & PCRE_ANCHORED) == 0)
variable length item in the regex. Remove the caseless flag for non-caseable
bytes. */
-if (reqbyte >= 0 &&
- ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
+if (reqchar >= 0 &&
+ ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
{
- int ch = reqbyte & 255;
- re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
- cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
+#ifdef COMPILE_PCRE8
+ re->req_char = reqchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+ re->req_char = reqchar & 0xffff;
+#endif
+#endif
+ if ((reqchar & REQ_CASELESS) != 0 && MAX_255(re->req_char)
+ && cd->fcc[re->req_char] != re->req_char)
+ re->flags |= PCRE_RCH_CASELESS;
+
re->flags |= PCRE_REQCHSET;
}
@@ -7764,19 +7824,19 @@ printf("Options=%08x\n", re->options);
if ((re->flags & PCRE_FIRSTSET) != 0)
{
- int ch = re->first_byte & 255;
- const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
- "" : " (caseless)";
- if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
+ pcre_uchar ch = re->first_char;
+ const char *caseless =
+ ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
+ if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
else printf("First char = \\x%02x%s\n", ch, caseless);
}
if ((re->flags & PCRE_REQCHSET) != 0)
{
- int ch = re->req_byte & 255;
- const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
- "" : " (caseless)";
- if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
+ pcre_uchar ch = re->req_char;
+ const char *caseless =
+ ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
+ if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
else printf("Req char = \\x%02x%s\n", ch, caseless);
}
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 0793897..8fed9b3 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -3013,13 +3013,14 @@ pcre_study_data internal_study;
const pcre_study_data *study = NULL;
real_pcre internal_re;
-const pcre_uint8 *req_byte_ptr;
+const pcre_uchar *req_char_ptr;
const pcre_uint8 *start_bits = NULL;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
int newline;
/* Plausibility checks */
@@ -3069,7 +3070,7 @@ if (re->magic_number != MAGIC_NUMBER)
current_subject = (const unsigned char *)subject + start_offset;
end_subject = (const unsigned char *)subject + length;
-req_byte_ptr = current_subject - 1;
+req_char_ptr = current_subject - 1;
#ifdef SUPPORT_UTF8
utf8 = (re->options & PCRE_UTF8) != 0;
@@ -3189,9 +3190,10 @@ if (!anchored)
{
if ((re->flags & PCRE_FIRSTSET) != 0)
{
- first_byte = re->first_byte & 255;
- if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
- first_byte = lcc[first_byte];
+ has_first_char = TRUE;
+ first_char = first_char2 = re->first_char;
+ if ((re->flags & PCRE_FCH_CASELESS) != 0)
+ first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
}
else
{
@@ -3206,9 +3208,10 @@ character" set. */
if ((re->flags & PCRE_REQCHSET) != 0)
{
- req_byte = re->req_byte & 255;
- req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
- req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
+ has_req_char = TRUE;
+ req_char = req_char2 = re->req_char;
+ if ((re->flags & PCRE_RCH_CASELESS) != 0)
+ req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
}
/* Call the main matching function, looping for a non-anchored regex after a
@@ -3254,17 +3257,17 @@ for (;;)
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{
- /* Advance to a known first byte. */
+ /* Advance to a known first char. */
- if (first_byte >= 0)
+ if (has_first_char)
{
- if (first_byte_caseless)
+ if (first_char != first_char2)
while (current_subject < end_subject &&
- lcc[*current_subject] != first_byte)
+ *current_subject != first_char && *current_subject != first_char2)
current_subject++;
else
while (current_subject < end_subject &&
- *current_subject != first_byte)
+ *current_subject != first_char)
current_subject++;
}
@@ -3344,8 +3347,8 @@ for (;;)
(pcre_uint32)(end_subject - current_subject) < study->minlength)
return PCRE_ERROR_NOMATCH;
- /* If req_byte is set, we know that that character must appear in the
- subject for the match to succeed. If the first character is set, req_byte
+ /* If req_char is set, we know that that character must appear in the
+ subject for the match to succeed. If the first character is set, req_char
must be later in the subject; otherwise the test starts at the match
point. This optimization can save a huge amount of work in patterns with
nested unlimited repeats that aren't going to match. Writing separate
@@ -3357,28 +3360,28 @@ for (;;)
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
string... so we don't do this when the string is sufficiently long. */
- if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
+ if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
{
- register const pcre_uchar *p = current_subject + ((first_byte >= 0)? 1 : 0);
+ register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
- if (p > req_byte_ptr)
+ if (p > req_char_ptr)
{
- if (req_byte_caseless)
+ if (req_char != req_char2)
{
while (p < end_subject)
{
register int pp = *p++;
- if (pp == req_byte || pp == req_byte2) { p--; break; }
+ if (pp == req_char || pp == req_char2) { p--; break; }
}
}
else
{
while (p < end_subject)
{
- if (*p++ == req_byte) { p--; break; }
+ if (*p++ == req_char) { p--; break; }
}
}
@@ -3391,7 +3394,7 @@ for (;;)
found it, so that we don't search again next time round the loop if
the start hasn't passed this character yet. */
- req_byte_ptr = p;
+ req_char_ptr = p;
}
}
}
diff --git a/pcre_exec.c b/pcre_exec.c
index e532513..778a301 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -5964,17 +5964,18 @@ pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,
#endif
{
int rc, ocount, arg_offset_max;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
int newline;
BOOL using_temporary_offsets = FALSE;
BOOL anchored;
BOOL startline;
BOOL firstline;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
BOOL utf8;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
match_data match_block;
match_data *md = &match_block;
const pcre_uint8 *tables;
@@ -5982,7 +5983,7 @@ const pcre_uint8 *start_bits = NULL;
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
PCRE_PUCHAR end_subject;
PCRE_PUCHAR start_partial = NULL;
-PCRE_PUCHAR req_byte_ptr = start_match - 1;
+PCRE_PUCHAR req_char_ptr = start_match - 1;
pcre_study_data internal_study;
const pcre_study_data *study;
@@ -6252,7 +6253,7 @@ if (md->offset_vector != NULL)
md->offset_vector[0] = md->offset_vector[1] = -1;
}
-/* Set up the first character to match, if available. The first_byte value is
+/* Set up the first character to match, if available. The first_char value is
never set for an anchored regular expression, but the anchoring may be forced
at run time, so we have to test for anchoring. The first char may be unset for
an unanchored pattern, of course. If there's no first char and the pattern was
@@ -6262,9 +6263,10 @@ if (!anchored)
{
if ((re->flags & PCRE_FIRSTSET) != 0)
{
- first_byte = re->first_byte & 255;
- if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
- first_byte = md->lcc[first_byte];
+ has_first_char = TRUE;
+ first_char = first_char2 = re->first_char;
+ if ((re->flags & PCRE_FCH_CASELESS) != 0)
+ first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
}
else
if (!startline && study != NULL &&
@@ -6277,14 +6279,13 @@ character" set. */
if ((re->flags & PCRE_REQCHSET) != 0)
{
- req_byte = re->req_byte & 255;
- req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
- req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
+ has_req_char = TRUE;
+ req_char = req_char2 = re->req_char;
+ if ((re->flags & PCRE_RCH_CASELESS) != 0)
+ req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
}
-
-
/* ==========================================================================*/
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
@@ -6327,15 +6328,16 @@ for(;;)
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{
- /* Advance to a unique first byte if there is one. */
+ /* Advance to a unique first char if there is one. */
- if (first_byte >= 0)
+ if (has_first_char)
{
- if (first_byte_caseless)
- while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+ if (first_char != first_char2)
+ while (start_match < end_subject &&
+ *start_match != first_char && *start_match != first_char2)
start_match++;
else
- while (start_match < end_subject && *start_match != first_byte)
+ while (start_match < end_subject && *start_match != first_char)
start_match++;
}
@@ -6418,8 +6420,8 @@ for(;;)
break;
}
- /* If req_byte is set, we know that that character must appear in the
- subject for the match to succeed. If the first character is set, req_byte
+ /* If req_char is set, we know that that character must appear in the
+ subject for the match to succeed. If the first character is set, req_char
must be later in the subject; otherwise the test starts at the match point.
This optimization can save a huge amount of backtracking in patterns with
nested unlimited repeats that aren't going to match. Writing separate code
@@ -6432,28 +6434,28 @@ for(;;)
32-megabyte string... so we don't do this when the string is sufficiently
long. */
- if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
+ if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
{
- register PCRE_PUCHAR p = start_match + ((first_byte >= 0)? 1 : 0);
+ register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */
- if (p > req_byte_ptr)
+ if (p > req_char_ptr)
{
- if (req_byte_caseless)
+ if (req_char != req_char2)
{
while (p < end_subject)
{
register int pp = *p++;
- if (pp == req_byte || pp == req_byte2) { p--; break; }
+ if (pp == req_char || pp == req_char2) { p--; break; }
}
}
else
{
while (p < end_subject)
{
- if (*p++ == req_byte) { p--; break; }
+ if (*p++ == req_char) { p--; break; }
}
}
@@ -6470,7 +6472,7 @@ for(;;)
found it, so that we don't search again next time round the loop if
the start hasn't passed this character yet. */
- req_byte_ptr = p;
+ req_char_ptr = p;
}
}
}
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 7e7e238..6c89121 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -110,7 +110,7 @@ switch (what)
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
- ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+ ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
@@ -137,7 +137,7 @@ switch (what)
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
- ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
+ ((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
diff --git a/pcre_info.c b/pcre_info.c
index 1362bc1..9211df4 100644
--- a/pcre_info.c
+++ b/pcre_info.c
@@ -85,7 +85,7 @@ if (re->magic_number != MAGIC_NUMBER)
}
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
if (first_byte != NULL)
- *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+ *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
return re->top_bracket;
}
diff --git a/pcre_internal.h b/pcre_internal.h
index b9f8dd4..9dbaf05 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -230,15 +230,26 @@ by "configure". */
/* All character handling must be done as unsigned characters. Otherwise there
are problems with top-bit-set characters and functions such as isspace().
However, we leave the interface to the outside world as char * or short *,
-because that should make things easier for callers. We define a short type
-for the current character representation (either 8 or 16 bit) to save lots
-of typing. I tried "uchar", but it causes problems on Digital Unix, where
-it is defined in sys/types, so use "uschar" instead. */
+because that should make things easier for callers. This character type is
+called pcre_uchar.
+
+The IN_UCHARS macro multiply its argument with the byte size of the current
+pcre_uchar type. Useful for memcpy and such operations, whose require the
+byte size of their input/output buffers.
+
+The MAX_255 macro checks whether its pcre_uchar input is less than 256.
+
+The TABLE_GET macro is designed for accessing elements of tables whose contain
+exactly 256 items. When the character is able to contain more than 256
+items, some check is needed before accessing these tables.
+*/
#ifdef COMPILE_PCRE8
typedef unsigned char pcre_uchar;
#define IN_UCHARS(x) (x)
+#define MAX_255(c) 1
+#define TABLE_GET(c, table, default) ((table)[c])
#else
@@ -248,8 +259,11 @@ typedef unsigned char pcre_uchar;
pcre.h(.in) and disable (comment out) this message. */
#error Warning: PCRE_SCHAR16 is not a 16 bit data type.
#endif
+
typedef pcre_uint16 pcre_uchar;
#define IN_UCHARS(x) ((x) << 1)
+#define MAX_255(c) ((c) <= 255u)
+#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
#else
#error Unsupported compiling mode
@@ -693,12 +707,14 @@ the restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
-#define PCRE_FIRSTSET 0x0002 /* first_byte is set */
+#define PCRE_FIRSTSET 0x0002 /* first_char is set */
#define PCRE_REQCHSET 0x0004 /* req_byte is set */
#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */
#define PCRE_JCHANGED 0x0010 /* j option used in regex */
#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */
#define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */
+#define PCRE_FCH_CASELESS 0x0080 /* caseless first char */
+#define PCRE_RCH_CASELESS 0x0100 /* caseless requested char */
/* Flags for the "extra" block produced by pcre_study(). */
@@ -747,12 +763,6 @@ req_byte match. */
#define REQ_BYTE_MAX 1000
-/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
-variable-length repeat, or a anything other than literal characters. */
-
-#define REQ_CASELESS 0x0100 /* indicates caselessness */
-#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
-
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
environments where these macros are defined elsewhere. Unfortunately, there
is no way to do the same for the typedef. */
@@ -1801,8 +1811,8 @@ typedef struct real_pcre {
pcre_uint16 dummy1; /* For future use */
pcre_uint16 top_bracket;
pcre_uint16 top_backref;
- pcre_uint16 first_byte;
- pcre_uint16 req_byte;
+ pcre_uint16 first_char; /* Starting character */
+ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 7a2c41d..03833e0 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -375,7 +375,7 @@ enum {
/* Max limit of recursions. */
#define CALL_LIMIT (5 * sizeof(sljit_w))
/* Last known position of the requested byte. */
-#define REQ_BYTE_PTR (6 * sizeof(sljit_w))
+#define REQ_CHAR_PTR (6 * sizeof(sljit_w))
/* End pointer of the first line. */
#define FIRSTLINE_END (7 * sizeof(sljit_w))
/* The output vector is stored on the stack, and contains pointers
@@ -1279,7 +1279,7 @@ if (common->utf8)
else
#endif
c = *cc;
-return common->fcc[c] != c;
+return MAX_255(c) ? common->fcc[c] != c : FALSE;
}
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
@@ -1295,7 +1295,7 @@ if (common->utf8 && c > 127)
#endif
}
#endif
-return common->fcc[c];
+return TABLE_GET(c, common->fcc, c);
}
static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
@@ -1728,13 +1728,13 @@ if (newlinecheck)
return mainloop;
}
-static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)
+static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar firstchar, BOOL caseless, BOOL firstline)
{
DEFINE_COMPILER;
struct sljit_label *start;
struct sljit_jump *leave;
struct sljit_jump *found;
-pcre_uint16 oc, bit;
+pcre_uchar oc, bit;
if (firstline)
{
@@ -1744,23 +1744,24 @@ if (firstline)
start = LABEL();
leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
-if ((firstbyte & REQ_CASELESS) == 0)
- found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);
+oc = firstchar;
+if (caseless)
+ oc = TABLE_GET(firstchar, common->fcc, firstchar);
+if (firstchar == oc)
+ found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstchar);
else
{
- firstbyte &= 0xff;
- oc = common->fcc[firstbyte];
- bit = firstbyte ^ oc;
+ bit = firstchar ^ oc;
if (ispowerof2(bit))
{
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
- found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);
+ found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstchar | bit);
}
else
{
- OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstchar);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
@@ -1915,7 +1916,7 @@ if (firstline)
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
}
-static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)
+static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar reqchar, BOOL caseless, BOOL has_firstchar)
{
DEFINE_COMPILER;
struct sljit_label *loop;
@@ -1924,14 +1925,14 @@ struct sljit_jump *alreadyfound;
struct sljit_jump *found;
struct sljit_jump *foundoc = NULL;
struct sljit_jump *notfound;
-pcre_uint16 oc, bit;
+pcre_uchar oc, bit;
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
-if (has_firstbyte)
+if (has_firstchar)
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
else
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
@@ -1940,21 +1941,22 @@ loop = LABEL();
notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
-if ((reqbyte & REQ_CASELESS) == 0)
- found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);
+oc = reqchar;
+if (caseless)
+ oc = TABLE_GET(reqchar, common->fcc, reqchar);
+if (reqchar == oc)
+ found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
else
{
- reqbyte &= 0xff;
- oc = common->fcc[reqbyte];
- bit = reqbyte ^ oc;
+ bit = reqchar ^ oc;
if (ispowerof2(bit))
{
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
- found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);
+ found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar | bit);
}
else
{
- found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);
+ found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqchar);
foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
}
}
@@ -1964,7 +1966,7 @@ JUMPTO(SLJIT_JUMP, loop);
JUMPHERE(found);
if (foundoc)
JUMPHERE(foundoc);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
JUMPHERE(alreadyfound);
JUMPHERE(toolong);
return notfound;
@@ -3092,16 +3094,16 @@ switch(type)
case OP_CHAR:
case OP_CHARI:
- length = IN_UCHARS(1);
+ length = 1;
#ifdef SUPPORT_UTF8
if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
#endif
if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
{
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
- context.length = length;
+ context.length = IN_UCHARS(length);
context.sourcereg = -1;
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
context.ucharptr = 0;
@@ -3307,7 +3309,6 @@ if (context.length > 0)
context.ucharptr = 0;
#endif
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
-sljit_emit_op0(compiler, SLJIT_NOP);
return cc;
}
@@ -6291,7 +6292,7 @@ sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
if ((re->flags & PCRE_REQCHSET) != 0)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
@@ -6309,14 +6310,14 @@ if ((re->options & PCRE_ANCHORED) == 0)
mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
/* Forward search if possible. */
if ((re->flags & PCRE_FIRSTSET) != 0)
- fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0);
+ fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
else if ((re->flags & PCRE_STARTLINE) != 0)
fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
}
if ((re->flags & PCRE_REQCHSET) != 0)
- reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0);
+ reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
/* Store the current STR_PTR in OVECTOR(0). */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
diff --git a/pcre_try_flipped.c b/pcre_try_flipped.c
index 7309876..d09a10f 100644
--- a/pcre_try_flipped.c
+++ b/pcre_try_flipped.c
@@ -113,10 +113,10 @@ internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
-internal_re->first_byte =
- (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
-internal_re->req_byte =
- (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
+internal_re->first_char =
+ (pcre_uint16)byteflip(re->first_char, sizeof(re->first_char));
+internal_re->req_char =
+ (pcre_uint16)byteflip(re->req_char, sizeof(re->req_char));
internal_re->name_table_offset =
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
internal_re->name_entry_size =
diff --git a/pcretest.c b/pcretest.c
index 147e332..7826fcd 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1920,10 +1920,10 @@ while (!done)
(pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
rre->top_backref =
(pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
- rre->first_byte =
- (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
- rre->req_byte =
- (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
+ rre->first_char =
+ (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
+ rre->req_char =
+ (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
sizeof(rre->name_table_offset));
rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
@@ -2079,13 +2079,14 @@ while (!done)
}
else
{
- int ch = first_char & 255;
- const char *caseless = ((first_char & REQ_CASELESS) == 0)?
+ const char *caseless =
+ ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
"" : " (caseless)";
- if (PRINTHEX(ch))
- fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
+
+ if (PRINTHEX(first_char))
+ fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
else
- fprintf(outfile, "First char = %d%s\n", ch, caseless);
+ fprintf(outfile, "First char = %d%s\n", first_char, caseless);
}
if (need_char < 0)
@@ -2094,13 +2095,14 @@ while (!done)
}
else
{
- int ch = need_char & 255;
- const char *caseless = ((need_char & REQ_CASELESS) == 0)?
+ const char *caseless =
+ ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
"" : " (caseless)";
- if (PRINTHEX(ch))
- fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
+
+ if (PRINTHEX(need_char))
+ fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
else
- fprintf(outfile, "Need char = %d%s\n", ch, caseless);
+ fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
}
/* Don't output study size; at present it is in any case a fixed