summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-11 18:07:25 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-11 18:07:25 +0000
commit02b9094df724302cd24f71f6a28ec3df318cec71 (patch)
tree550804928e374af17e464536f206369cbc7ec4d8
parentd277aeef628cc77e6daafe2771970319476d2585 (diff)
downloadpcre-02b9094df724302cd24f71f6a28ec3df318cec71.tar.gz
Optimization fixes for ranges contains only a single character
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@798 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_compile.c70
-rw-r--r--pcre_dfa_exec.c29
-rw-r--r--pcre_exec.c65
-rw-r--r--pcre_internal.h9
-rw-r--r--pcre_jit_test.c43
-rw-r--r--testdata/testoutput1020
-rw-r--r--testdata/testoutput24
-rw-r--r--testdata/testoutput512
8 files changed, 160 insertions, 92 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 3fa7c67..2be0936 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3413,7 +3413,8 @@ for (;; ptr++)
BOOL is_quantifier;
BOOL is_recurse;
BOOL reset_bracount;
- int class_charcount;
+ int class_has_8bitchar;
+ int class_single_char;
int class_lastchar;
int newoptions;
int recno;
@@ -3710,11 +3711,13 @@ for (;; ptr++)
should_flip_negation = FALSE;
- /* Keep a count of chars with values < 256 so that we can optimize the case
- of just a single character (as long as it's < 256). However, For higher
- valued UTF-8 characters, we don't yet do any optimization. */
+ /* For optimization purposes, we track some properties of the class.
+ class_has_8bitchar will be non-zero, if the class contains at least one
+ < 256 character. class_single_char will be 1, if the class only contains
+ a single character. */
- class_charcount = 0;
+ class_has_8bitchar = 0;
+ class_single_char = 0;
class_lastchar = -1;
/* Initialize the 32-char bit map to all zeros. We build the map in a
@@ -3870,16 +3873,20 @@ for (;; ptr++)
for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
ptr = tempptr + 1;
- class_charcount = 10; /* Set > 1; assumes more than 1 per class */
+ /* Every class contains at least one < 256 characters. */
+ class_has_8bitchar = 1;
+ /* Every class contains at least two characters. */
+ class_single_char = 2;
continue; /* End of POSIX syntax handling */
}
/* Backslash may introduce a single character, or it may introduce one
of the specials, which just set a flag. The sequence \b is a special
case. Inside a class (and only there) it is treated as backspace. We
- assume that other escapes have more than one character in them, so set
- class_charcount bigger than one. Unrecognized escapes fall through and
- are either treated as literal characters (by default), or are faulted if
+ assume that other escapes have more than one character in them, so
+ speculatively set both class_has_8bitchar class_single_char bigger
+ than one. Unrecognized escapes fall through and are either treated
+ as literal characters (by default), or are faulted if
PCRE_EXTRA is set. */
if (c == CHAR_BACKSLASH)
@@ -3902,7 +3909,10 @@ for (;; ptr++)
if (c < 0)
{
register const pcre_uint8 *cbits = cd->cbits;
- class_charcount += 2; /* Greater than 1 is what matters */
+ /* Every class contains at least two < 256 characters. */
+ class_has_8bitchar++;
+ /* Every class contains at least two characters. */
+ class_single_char += 2;
switch (-c)
{
@@ -3915,7 +3925,7 @@ for (;; ptr++)
case ESC_SU:
nestptr = ptr;
ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */
- class_charcount -= 2; /* Undo! */
+ class_has_8bitchar--; /* Undo! */
continue;
#endif
case ESC_d:
@@ -4081,7 +4091,7 @@ for (;; ptr++)
XCL_PROP : XCL_NOTPROP;
*class_uchardata++ = ptype;
*class_uchardata++ = pdata;
- class_charcount -= 2; /* Not a < 256 character */
+ class_has_8bitchar--; /* Undo! */
continue;
}
#endif
@@ -4095,14 +4105,15 @@ for (;; ptr++)
*errorcodeptr = ERR7;
goto FAILED;
}
- class_charcount -= 2; /* Undo the default count from above */
- c = *ptr; /* Get the final character and fall through */
+ class_has_8bitchar--; /* Undo the speculative increase. */
+ class_single_char -= 2; /* Undo the speculative increase. */
+ c = *ptr; /* Get the final character and fall through */
break;
}
}
/* Fall through if we have a single character (c >= 0). This may be
- greater than 256 mode. */
+ greater than 256. */
} /* End of backslash handling */
@@ -4195,6 +4206,10 @@ for (;; ptr++)
if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
+ /* Since we found a character range, single character optimizations
+ cannot be done anymore. */
+ class_single_char = 2;
+
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
matching, we have to use an XCLASS with extra data items. Caseless
matching for characters > 127 is available only if UCP support is
@@ -4323,8 +4338,7 @@ for (;; ptr++)
/* We use the bit map for 8 bit mode, or when the characters fall
partially or entirely to [0-255] ([0-127] for UCP) ranges. */
- class_charcount += d - c + 1;
- class_lastchar = d;
+ class_has_8bitchar = 1;
/* We can save a bit of time by skipping this in the pre-compile. */
@@ -4347,8 +4361,11 @@ for (;; ptr++)
LONE_SINGLE_CHARACTER:
- /* Handle a character that cannot go in the bit map */
+ /* Only the value of 1 matters for class_single_char. */
+ if (class_single_char < 2) class_single_char++;
+ class_lastchar = c;
+ /* Handle a character that cannot go in the bit map */
#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
#elif defined SUPPORT_UTF
@@ -4396,14 +4413,13 @@ for (;; ptr++)
#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
/* Handle a single-byte character */
{
+ class_has_8bitchar = 1;
classbits[c/8] |= (1 << (c&7));
if ((options & PCRE_CASELESS) != 0)
{
c = cd->fcc[c]; /* flip case */
classbits[c/8] |= (1 << (c&7));
}
- class_charcount++;
- class_lastchar = c;
}
}
@@ -4443,15 +4459,15 @@ for (;; ptr++)
of reqchar, save the previous value for reinstating. */
#ifdef SUPPORT_UTF
- if (class_charcount == 1 && !xclass &&
- (!utf || !negate_class || class_lastchar < 128))
+ if (class_single_char == 1 && (!utf || !negate_class
+ || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
#else
- if (class_charcount == 1)
+ if (class_single_char == 1)
#endif
{
zeroreqchar = reqchar;
- /* The OP_NOT[I] opcodes work on one-byte characters only. */
+ /* The OP_NOT[I] opcodes work on single characters only. */
if (negate_class)
{
@@ -4466,7 +4482,7 @@ for (;; ptr++)
then we can handle this with the normal one-character code. */
#ifdef SUPPORT_UTF
- if (utf && class_lastchar > 127)
+ if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR)
mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
else
#endif
@@ -4510,7 +4526,7 @@ for (;; ptr++)
/* If the map is required, move up the extra data to make room for it;
otherwise just move the code pointer to the end of the extra data. */
- if (class_charcount > 0)
+ if (class_has_8bitchar > 0)
{
*code++ |= XCL_MAP;
memmove(code + (32 / sizeof(pcre_uchar)), code,
@@ -6686,7 +6702,7 @@ for (;; ptr++)
handle it as a data character. */
#ifdef SUPPORT_UTF
- if (utf && c > 127)
+ if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
mclength = PRIV(ord2utf)(c, mcbuffer);
else
#endif
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 58197ce..2b48eda 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -128,22 +128,27 @@ static const pcre_uint8 coptable[] = {
1, /* noti */
/* Positive single-char repeats */
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
- 3, 3, 3, /* upto, minupto, exact */
- 1, 1, 1, 3, /* *+, ++, ?+, upto+ */
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
+ 1+IMM2_SIZE, /* exact */
+ 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
- 3, 3, 3, /* upto I, minupto I, exact I */
- 1, 1, 1, 3, /* *+I, ++I, ?+I, upto+I */
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
+ 1+IMM2_SIZE, /* exact I */
+ 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
/* Negative single-char repeats - only for chars < 256 */
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
- 3, 3, 3, /* NOT upto, minupto, exact */
- 1, 1, 1, 3, /* NOT *+, ++, ?+, upto+ */
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
+ 1+IMM2_SIZE, /* NOT exact */
+ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
- 3, 3, 3, /* NOT upto I, minupto I, exact I */
- 1, 1, 1, 3, /* NOT *+I, ++I, ?+I, upto+I */
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
+ 1+IMM2_SIZE, /* NOT exact I */
+ 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
/* Positive type repeats */
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
- 3, 3, 3, /* Type upto, minupto, exact */
- 1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */
+ 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
+ 1+IMM2_SIZE, /* Type exact */
+ 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */
@@ -296,7 +301,7 @@ Returns: nothing
*/
static void
-pchars(unsigned char *p, int length, FILE *f)
+pchars(const pcre_uchar *p, int length, FILE *f)
{
int c;
while (length-- > 0)
@@ -582,7 +587,7 @@ for (;;)
#ifdef PCRE_DEBUG
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
- pchars((pcre_uchar *)ptr, strlen((char *)ptr), stdout);
+ pchars(ptr, STRLEN_UC(ptr), stdout);
printf("\"\n");
printf("%.*sActive states: ", rlevel*2-2, SP);
diff --git a/pcre_exec.c b/pcre_exec.c
index 9aa07a7..5d85e4b 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -3085,7 +3085,10 @@ for (;;)
if (fc < 128)
{
- if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+ if (md->lcc[fc]
+ != TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH);
+ ecode++;
+ eptr++;
}
/* Otherwise we must pick up the subject character */
@@ -3316,7 +3319,7 @@ for (;;)
if (op >= OP_STARI) /* Caseless */
{
#ifdef COMPILE_PCRE8
- /* fc must be < 128 */
+ /* fc must be < 128 if UTF is enabled. */
foc = md->fcc[fc];
#else
#ifdef SUPPORT_UTF
@@ -3459,11 +3462,25 @@ for (;;)
GETCHARINCTEST(c, eptr);
if (op == OP_NOTI) /* The caseless case */
{
-#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
- if (c < 256)
-#endif
- c = md->lcc[c];
- if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
+ register int ch, och;
+ ch = *ecode++;
+#ifdef COMPILE_PCRE8
+ /* ch must be < 128 if UTF is enabled. */
+ och = md->fcc[ch];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+ if (utf && ch > 127)
+ och = UCD_OTHERCASE(ch);
+#else
+ if (utf && ch > 127)
+ och = ch;
+#endif /* SUPPORT_UCP */
+ else
+#endif /* SUPPORT_UTF */
+ och = TABLE_GET(ch, md->fcc, ch);
+#endif /* COMPILE_PCRE8 */
+ if (ch == c || och == c) MRRETURN(MATCH_NOMATCH);
}
else /* Caseful */
{
@@ -3562,7 +3579,22 @@ for (;;)
if (op >= OP_NOTSTARI) /* Caseless */
{
- fc = TABLE_GET(fc, md->lcc, fc);
+#ifdef COMPILE_PCRE8
+ /* fc must be < 128 if UTF is enabled. */
+ foc = md->fcc[fc];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+ if (utf && fc > 127)
+ foc = UCD_OTHERCASE(fc);
+#else
+ if (utf && fc > 127)
+ foc = fc;
+#endif /* SUPPORT_UCP */
+ else
+#endif /* SUPPORT_UTF */
+ foc = TABLE_GET(fc, md->fcc, fc);
+#endif /* COMPILE_PCRE8 */
#ifdef SUPPORT_UTF
if (utf)
@@ -3576,8 +3608,7 @@ for (;;)
MRRETURN(MATCH_NOMATCH);
}
GETCHARINC(d, eptr);
- if (d < 256) d = md->lcc[d];
- if (fc == d) MRRETURN(MATCH_NOMATCH);
+ if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH);
}
}
else
@@ -3591,7 +3622,8 @@ for (;;)
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+ if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH);
+ eptr++;
}
}
@@ -3614,8 +3646,7 @@ for (;;)
MRRETURN(MATCH_NOMATCH);
}
GETCHARINC(d, eptr);
- if (d < 256) d = md->lcc[d];
- if (fc == d) MRRETURN(MATCH_NOMATCH);
+ if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH);
}
}
else
@@ -3632,7 +3663,8 @@ for (;;)
SCHECK_PARTIAL();
MRRETURN(MATCH_NOMATCH);
}
- if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+ if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH);
+ eptr++;
}
}
/* Control never gets here */
@@ -3657,8 +3689,7 @@ for (;;)
break;
}
GETCHARLEN(d, eptr, len);
- if (d < 256) d = md->lcc[d];
- if (fc == d) break;
+ if (fc == d || foc == d) break;
eptr += len;
}
if (possessive) continue;
@@ -3681,7 +3712,7 @@ for (;;)
SCHECK_PARTIAL();
break;
}
- if (fc == md->lcc[*eptr]) break;
+ if (fc == *eptr || foc == *eptr) break;
eptr++;
}
if (possessive) continue;
diff --git a/pcre_internal.h b/pcre_internal.h
index 9a20e73..fa0fb8b 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -532,6 +532,7 @@ UTF support is omitted, we don't even define them. */
#ifndef SUPPORT_UTF
+/* #define MAX_VALUE_FOR_SINGLE_CHAR */
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
/* #define NOT_FIRSTCHAR(c) */
@@ -554,6 +555,10 @@ from the tables whose names start with PRIV(utf8_table). They were rewritten by
a user so as not to use loops, because in some environments this gives a
significant performance advantage, and it seems never to do any harm. */
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 127
+
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) ((c) >= 0xc0)
@@ -721,6 +726,10 @@ because almost all calls are already within a block of UTF-8 only code. */
#ifdef COMPILE_PCRE16
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 65535
+
/* Tests whether the code point needs extra characters to decode. */
#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index d2c2027..a1fd47b 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -290,6 +290,10 @@ static struct regression_test_case regression_test_cases[] = {
{ MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
{ CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
{ CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
+ { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
+ { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
+ { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
+ { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
/* Basic character sets. */
{ MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
@@ -626,6 +630,9 @@ static struct regression_test_case regression_test_cases[] = {
{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
+ { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
+ { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
+ { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
/* Deep recursion. */
{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
@@ -736,9 +743,9 @@ static int regression_tests(void)
{
struct regression_test_case *current = regression_test_cases;
const char *error;
- int i, err_offs, is_succesful;
+ int i, err_offs, is_successful;
int total = 0;
- int succesful = 0;
+ int successful = 0;
int counter = 0;
#ifdef SUPPORT_PCRE8
pcre *re8;
@@ -889,7 +896,7 @@ static int regression_tests(void)
/* If F_DIFF is set, just run the test, but do not compare the results.
Segfaults can still be captured. */
- is_succesful = 1;
+ is_successful = 1;
if (!(current->start_offset & F_DIFF)) {
#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
@@ -898,7 +905,7 @@ static int regression_tests(void)
printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
return_value8_1, return_value8_2, return_value16_1, return_value16_2,
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
} else if (return_value8_1 >= 0) {
return_value8_1 *= 2;
/* Transform back the results. */
@@ -916,7 +923,7 @@ static int regression_tests(void)
printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
}
} else {
@@ -926,14 +933,14 @@ static int regression_tests(void)
if (return_value8_1 != return_value8_2) {
printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
return_value8_1, return_value8_2, total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
} else if (return_value8_1 >= 0) {
return_value8_1 *= 2;
for (i = 0; i < return_value8_1; ++i)
if (ovector8_1[i] != ovector8_2[i]) {
printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
}
#endif
@@ -942,14 +949,14 @@ static int regression_tests(void)
if (return_value16_1 != return_value16_2) {
printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
return_value16_1, return_value16_2, total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
} else if (return_value16_1 >= 0) {
return_value16_1 *= 2;
for (i = 0; i < return_value16_1; ++i)
if (ovector16_1[i] != ovector16_2[i]) {
printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
}
#endif
@@ -959,19 +966,19 @@ static int regression_tests(void)
#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
}
- if (is_succesful) {
+ if (is_successful) {
#ifdef SUPPORT_PCRE8
if (!(current->start_offset & F_NO8)) {
if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
}
#endif
@@ -980,20 +987,20 @@ static int regression_tests(void)
if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
total, current->pattern, current->input);
- is_succesful = 0;
+ is_successful = 0;
}
}
#endif
}
- if (is_succesful)
- succesful++;
+ if (is_successful)
+ successful++;
#ifdef SUPPORT_PCRE8
if (re8) {
@@ -1014,11 +1021,11 @@ static int regression_tests(void)
current++;
}
- if (total == succesful) {
+ if (total == successful) {
printf("\nAll JIT regression tests are successfully passed.\n");
return 0;
} else {
- printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
+ printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
return 1;
}
}
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ef9b82c..62d6f3e 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -339,12 +339,12 @@ Memory allocation (code space): 10
------------------------------------------------------------------
/[\x{100}]/8BM
-Memory allocation (code space): 15
+Memory allocation (code space): 10
------------------------------------------------------------------
- 0 11 Bra
- 3 [\x{100}]
- 11 11 Ket
- 14 End
+ 0 6 Bra
+ 3 \x{100}
+ 6 6 Ket
+ 9 End
------------------------------------------------------------------
/\x80/8BM
@@ -405,12 +405,12 @@ First char = 230
Need char = 158
/[\x{100}]/8BM
-Memory allocation (code space): 15
+Memory allocation (code space): 10
------------------------------------------------------------------
- 0 11 Bra
- 3 [\x{100}]
- 11 11 Ket
- 14 End
+ 0 6 Bra
+ 3 \x{100}
+ 6 6 Ket
+ 9 End
------------------------------------------------------------------
/[Z\x{100}]/8BM
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 417225a..b35e6a7 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -7548,7 +7548,7 @@ Matched, but too many substrings
/[^a]+a/BZi
------------------------------------------------------------------
Bra
- /i [^A]++
+ /i [^a]++
/i a
Ket
End
@@ -7557,7 +7557,7 @@ Matched, but too many substrings
/[^a]+A/BZi
------------------------------------------------------------------
Bra
- /i [^A]++
+ /i [^a]++
/i A
Ket
End
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b63934d..8de96cf 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -79,14 +79,14 @@ Need char = 191
/[\x{100}]/8DZ
------------------------------------------------------------------
Bra
- [\x{100}]
+ \x{100}
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-No first char
-No need char
+First char = 196
+Need char = 128
/\x{ffffffff}/8
Failed: character value in \x{...} sequence is too large at offset 11
@@ -624,14 +624,14 @@ No need char
/[\x{100}]/8DZ
------------------------------------------------------------------
Bra
- [\x{100}]
+ \x{100}
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Options: utf8
-No first char
-No need char
+First char = 196
+Need char = 128
\x{100}
0: \x{100}
Z\x{100}