diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-11 18:07:25 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-11 18:07:25 +0000 |
commit | 02b9094df724302cd24f71f6a28ec3df318cec71 (patch) | |
tree | 550804928e374af17e464536f206369cbc7ec4d8 | |
parent | d277aeef628cc77e6daafe2771970319476d2585 (diff) | |
download | pcre-02b9094df724302cd24f71f6a28ec3df318cec71.tar.gz |
Optimization fixes for ranges contains only a single character
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@798 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | pcre_compile.c | 70 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 29 | ||||
-rw-r--r-- | pcre_exec.c | 65 | ||||
-rw-r--r-- | pcre_internal.h | 9 | ||||
-rw-r--r-- | pcre_jit_test.c | 43 | ||||
-rw-r--r-- | testdata/testoutput10 | 20 | ||||
-rw-r--r-- | testdata/testoutput2 | 4 | ||||
-rw-r--r-- | testdata/testoutput5 | 12 |
8 files changed, 160 insertions, 92 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index 3fa7c67..2be0936 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3413,7 +3413,8 @@ for (;; ptr++) BOOL is_quantifier; BOOL is_recurse; BOOL reset_bracount; - int class_charcount; + int class_has_8bitchar; + int class_single_char; int class_lastchar; int newoptions; int recno; @@ -3710,11 +3711,13 @@ for (;; ptr++) should_flip_negation = FALSE; - /* Keep a count of chars with values < 256 so that we can optimize the case - of just a single character (as long as it's < 256). However, For higher - valued UTF-8 characters, we don't yet do any optimization. */ + /* For optimization purposes, we track some properties of the class. + class_has_8bitchar will be non-zero, if the class contains at least one + < 256 character. class_single_char will be 1, if the class only contains + a single character. */ - class_charcount = 0; + class_has_8bitchar = 0; + class_single_char = 0; class_lastchar = -1; /* Initialize the 32-char bit map to all zeros. We build the map in a @@ -3870,16 +3873,20 @@ for (;; ptr++) for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; ptr = tempptr + 1; - class_charcount = 10; /* Set > 1; assumes more than 1 per class */ + /* Every class contains at least one < 256 characters. */ + class_has_8bitchar = 1; + /* Every class contains at least two characters. */ + class_single_char = 2; continue; /* End of POSIX syntax handling */ } /* Backslash may introduce a single character, or it may introduce one of the specials, which just set a flag. The sequence \b is a special case. Inside a class (and only there) it is treated as backspace. We - assume that other escapes have more than one character in them, so set - class_charcount bigger than one. Unrecognized escapes fall through and - are either treated as literal characters (by default), or are faulted if + assume that other escapes have more than one character in them, so + speculatively set both class_has_8bitchar class_single_char bigger + than one. Unrecognized escapes fall through and are either treated + as literal characters (by default), or are faulted if PCRE_EXTRA is set. */ if (c == CHAR_BACKSLASH) @@ -3902,7 +3909,10 @@ for (;; ptr++) if (c < 0) { register const pcre_uint8 *cbits = cd->cbits; - class_charcount += 2; /* Greater than 1 is what matters */ + /* Every class contains at least two < 256 characters. */ + class_has_8bitchar++; + /* Every class contains at least two characters. */ + class_single_char += 2; switch (-c) { @@ -3915,7 +3925,7 @@ for (;; ptr++) case ESC_SU: nestptr = ptr; ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ - class_charcount -= 2; /* Undo! */ + class_has_8bitchar--; /* Undo! */ continue; #endif case ESC_d: @@ -4081,7 +4091,7 @@ for (;; ptr++) XCL_PROP : XCL_NOTPROP; *class_uchardata++ = ptype; *class_uchardata++ = pdata; - class_charcount -= 2; /* Not a < 256 character */ + class_has_8bitchar--; /* Undo! */ continue; } #endif @@ -4095,14 +4105,15 @@ for (;; ptr++) *errorcodeptr = ERR7; goto FAILED; } - class_charcount -= 2; /* Undo the default count from above */ - c = *ptr; /* Get the final character and fall through */ + class_has_8bitchar--; /* Undo the speculative increase. */ + class_single_char -= 2; /* Undo the speculative increase. */ + c = *ptr; /* Get the final character and fall through */ break; } } /* Fall through if we have a single character (c >= 0). This may be - greater than 256 mode. */ + greater than 256. */ } /* End of backslash handling */ @@ -4195,6 +4206,10 @@ for (;; ptr++) if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; + /* Since we found a character range, single character optimizations + cannot be done anymore. */ + class_single_char = 2; + /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless matching, we have to use an XCLASS with extra data items. Caseless matching for characters > 127 is available only if UCP support is @@ -4323,8 +4338,7 @@ for (;; ptr++) /* We use the bit map for 8 bit mode, or when the characters fall partially or entirely to [0-255] ([0-127] for UCP) ranges. */ - class_charcount += d - c + 1; - class_lastchar = d; + class_has_8bitchar = 1; /* We can save a bit of time by skipping this in the pre-compile. */ @@ -4347,8 +4361,11 @@ for (;; ptr++) LONE_SINGLE_CHARACTER: - /* Handle a character that cannot go in the bit map */ + /* Only the value of 1 matters for class_single_char. */ + if (class_single_char < 2) class_single_char++; + class_lastchar = c; + /* Handle a character that cannot go in the bit map */ #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127))) #elif defined SUPPORT_UTF @@ -4396,14 +4413,13 @@ for (;; ptr++) #endif /* SUPPORT_UTF || COMPILE_PCRE16 */ /* Handle a single-byte character */ { + class_has_8bitchar = 1; classbits[c/8] |= (1 << (c&7)); if ((options & PCRE_CASELESS) != 0) { c = cd->fcc[c]; /* flip case */ classbits[c/8] |= (1 << (c&7)); } - class_charcount++; - class_lastchar = c; } } @@ -4443,15 +4459,15 @@ for (;; ptr++) of reqchar, save the previous value for reinstating. */ #ifdef SUPPORT_UTF - if (class_charcount == 1 && !xclass && - (!utf || !negate_class || class_lastchar < 128)) + if (class_single_char == 1 && (!utf || !negate_class + || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) #else - if (class_charcount == 1) + if (class_single_char == 1) #endif { zeroreqchar = reqchar; - /* The OP_NOT[I] opcodes work on one-byte characters only. */ + /* The OP_NOT[I] opcodes work on single characters only. */ if (negate_class) { @@ -4466,7 +4482,7 @@ for (;; ptr++) then we can handle this with the normal one-character code. */ #ifdef SUPPORT_UTF - if (utf && class_lastchar > 127) + if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR) mclength = PRIV(ord2utf)(class_lastchar, mcbuffer); else #endif @@ -4510,7 +4526,7 @@ for (;; ptr++) /* If the map is required, move up the extra data to make room for it; otherwise just move the code pointer to the end of the extra data. */ - if (class_charcount > 0) + if (class_has_8bitchar > 0) { *code++ |= XCL_MAP; memmove(code + (32 / sizeof(pcre_uchar)), code, @@ -6686,7 +6702,7 @@ for (;; ptr++) handle it as a data character. */ #ifdef SUPPORT_UTF - if (utf && c > 127) + if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) mclength = PRIV(ord2utf)(c, mcbuffer); else #endif diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 58197ce..2b48eda 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -128,22 +128,27 @@ static const pcre_uint8 coptable[] = { 1, /* noti */ /* Positive single-char repeats */ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* upto, minupto, exact */ - 1, 1, 1, 3, /* *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ + 1+IMM2_SIZE, /* exact */ + 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ - 3, 3, 3, /* upto I, minupto I, exact I */ - 1, 1, 1, 3, /* *+I, ++I, ?+I, upto+I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ + 1+IMM2_SIZE, /* exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ /* Negative single-char repeats - only for chars < 256 */ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* NOT upto, minupto, exact */ - 1, 1, 1, 3, /* NOT *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ + 1+IMM2_SIZE, /* NOT exact */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ - 3, 3, 3, /* NOT upto I, minupto I, exact I */ - 1, 1, 1, 3, /* NOT *+I, ++I, ?+I, upto+I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ + 1+IMM2_SIZE, /* NOT exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ /* Positive type repeats */ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* Type upto, minupto, exact */ - 1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ + 1+IMM2_SIZE, /* Type exact */ + 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ /* Character class & ref repeats */ 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ 0, 0, /* CRRANGE, CRMINRANGE */ @@ -296,7 +301,7 @@ Returns: nothing */ static void -pchars(unsigned char *p, int length, FILE *f) +pchars(const pcre_uchar *p, int length, FILE *f) { int c; while (length-- > 0) @@ -582,7 +587,7 @@ for (;;) #ifdef PCRE_DEBUG printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); - pchars((pcre_uchar *)ptr, strlen((char *)ptr), stdout); + pchars(ptr, STRLEN_UC(ptr), stdout); printf("\"\n"); printf("%.*sActive states: ", rlevel*2-2, SP); diff --git a/pcre_exec.c b/pcre_exec.c index 9aa07a7..5d85e4b 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -3085,7 +3085,10 @@ for (;;) if (fc < 128) { - if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); + if (md->lcc[fc] + != TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH); + ecode++; + eptr++; } /* Otherwise we must pick up the subject character */ @@ -3316,7 +3319,7 @@ for (;;) if (op >= OP_STARI) /* Caseless */ { #ifdef COMPILE_PCRE8 - /* fc must be < 128 */ + /* fc must be < 128 if UTF is enabled. */ foc = md->fcc[fc]; #else #ifdef SUPPORT_UTF @@ -3459,11 +3462,25 @@ for (;;) GETCHARINCTEST(c, eptr); if (op == OP_NOTI) /* The caseless case */ { -#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) - if (c < 256) -#endif - c = md->lcc[c]; - if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH); + register int ch, och; + ch = *ecode++; +#ifdef COMPILE_PCRE8 + /* ch must be < 128 if UTF is enabled. */ + och = md->fcc[ch]; +#else +#ifdef SUPPORT_UTF +#ifdef SUPPORT_UCP + if (utf && ch > 127) + och = UCD_OTHERCASE(ch); +#else + if (utf && ch > 127) + och = ch; +#endif /* SUPPORT_UCP */ + else +#endif /* SUPPORT_UTF */ + och = TABLE_GET(ch, md->fcc, ch); +#endif /* COMPILE_PCRE8 */ + if (ch == c || och == c) MRRETURN(MATCH_NOMATCH); } else /* Caseful */ { @@ -3562,7 +3579,22 @@ for (;;) if (op >= OP_NOTSTARI) /* Caseless */ { - fc = TABLE_GET(fc, md->lcc, fc); +#ifdef COMPILE_PCRE8 + /* fc must be < 128 if UTF is enabled. */ + foc = md->fcc[fc]; +#else +#ifdef SUPPORT_UTF +#ifdef SUPPORT_UCP + if (utf && fc > 127) + foc = UCD_OTHERCASE(fc); +#else + if (utf && fc > 127) + foc = fc; +#endif /* SUPPORT_UCP */ + else +#endif /* SUPPORT_UTF */ + foc = TABLE_GET(fc, md->fcc, fc); +#endif /* COMPILE_PCRE8 */ #ifdef SUPPORT_UTF if (utf) @@ -3576,8 +3608,7 @@ for (;;) MRRETURN(MATCH_NOMATCH); } GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (fc == d) MRRETURN(MATCH_NOMATCH); + if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH); } } else @@ -3591,7 +3622,8 @@ for (;;) SCHECK_PARTIAL(); MRRETURN(MATCH_NOMATCH); } - if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); + if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH); + eptr++; } } @@ -3614,8 +3646,7 @@ for (;;) MRRETURN(MATCH_NOMATCH); } GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (fc == d) MRRETURN(MATCH_NOMATCH); + if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH); } } else @@ -3632,7 +3663,8 @@ for (;;) SCHECK_PARTIAL(); MRRETURN(MATCH_NOMATCH); } - if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); + if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH); + eptr++; } } /* Control never gets here */ @@ -3657,8 +3689,7 @@ for (;;) break; } GETCHARLEN(d, eptr, len); - if (d < 256) d = md->lcc[d]; - if (fc == d) break; + if (fc == d || foc == d) break; eptr += len; } if (possessive) continue; @@ -3681,7 +3712,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if (fc == md->lcc[*eptr]) break; + if (fc == *eptr || foc == *eptr) break; eptr++; } if (possessive) continue; diff --git a/pcre_internal.h b/pcre_internal.h index 9a20e73..fa0fb8b 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -532,6 +532,7 @@ UTF support is omitted, we don't even define them. */ #ifndef SUPPORT_UTF +/* #define MAX_VALUE_FOR_SINGLE_CHAR */ /* #define HAS_EXTRALEN(c) */ /* #define GET_EXTRALEN(c) */ /* #define NOT_FIRSTCHAR(c) */ @@ -554,6 +555,10 @@ from the tables whose names start with PRIV(utf8_table). They were rewritten by a user so as not to use loops, because in some environments this gives a significant performance advantage, and it seems never to do any harm. */ +/* Tells the biggest code point which can be encoded as a single character. */ + +#define MAX_VALUE_FOR_SINGLE_CHAR 127 + /* Tests whether the code point needs extra characters to decode. */ #define HAS_EXTRALEN(c) ((c) >= 0xc0) @@ -721,6 +726,10 @@ because almost all calls are already within a block of UTF-8 only code. */ #ifdef COMPILE_PCRE16 +/* Tells the biggest code point which can be encoded as a single character. */ + +#define MAX_VALUE_FOR_SINGLE_CHAR 65535 + /* Tests whether the code point needs extra characters to decode. */ #define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800) diff --git a/pcre_jit_test.c b/pcre_jit_test.c index d2c2027..a1fd47b 100644 --- a/pcre_jit_test.c +++ b/pcre_jit_test.c @@ -290,6 +290,10 @@ static struct regression_test_case regression_test_cases[] = { { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, + { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" }, + { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, + { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, + { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, /* Basic character sets. */ { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, @@ -626,6 +630,9 @@ static struct regression_test_case regression_test_cases[] = { { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" }, { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" }, { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" }, + { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, + { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, + { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, /* Deep recursion. */ { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, @@ -736,9 +743,9 @@ static int regression_tests(void) { struct regression_test_case *current = regression_test_cases; const char *error; - int i, err_offs, is_succesful; + int i, err_offs, is_successful; int total = 0; - int succesful = 0; + int successful = 0; int counter = 0; #ifdef SUPPORT_PCRE8 pcre *re8; @@ -889,7 +896,7 @@ static int regression_tests(void) /* If F_DIFF is set, just run the test, but do not compare the results. Segfaults can still be captured. */ - is_succesful = 1; + is_successful = 1; if (!(current->start_offset & F_DIFF)) { #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) { @@ -898,7 +905,7 @@ static int regression_tests(void) printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, return_value16_1, return_value16_2, total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; /* Transform back the results. */ @@ -916,7 +923,7 @@ static int regression_tests(void) printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n", i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } } } else { @@ -926,14 +933,14 @@ static int regression_tests(void) if (return_value8_1 != return_value8_2) { printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value8_1, return_value8_2, total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } else if (return_value8_1 >= 0) { return_value8_1 *= 2; for (i = 0; i < return_value8_1; ++i) if (ovector8_1[i] != ovector8_2[i]) { printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } } #endif @@ -942,14 +949,14 @@ static int regression_tests(void) if (return_value16_1 != return_value16_2) { printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", return_value16_1, return_value16_2, total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } else if (return_value16_1 >= 0) { return_value16_1 *= 2; for (i = 0; i < return_value16_1; ++i) if (ovector16_1[i] != ovector16_2[i]) { printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } } #endif @@ -959,19 +966,19 @@ static int regression_tests(void) #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ } - if (is_succesful) { + if (is_successful) { #ifdef SUPPORT_PCRE8 if (!(current->start_offset & F_NO8)) { if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } } #endif @@ -980,20 +987,20 @@ static int regression_tests(void) if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) { printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) { printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", total, current->pattern, current->input); - is_succesful = 0; + is_successful = 0; } } #endif } - if (is_succesful) - succesful++; + if (is_successful) + successful++; #ifdef SUPPORT_PCRE8 if (re8) { @@ -1014,11 +1021,11 @@ static int regression_tests(void) current++; } - if (total == succesful) { + if (total == successful) { printf("\nAll JIT regression tests are successfully passed.\n"); return 0; } else { - printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total); + printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); return 1; } } diff --git a/testdata/testoutput10 b/testdata/testoutput10 index ef9b82c..62d6f3e 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -339,12 +339,12 @@ Memory allocation (code space): 10 ------------------------------------------------------------------ /[\x{100}]/8BM -Memory allocation (code space): 15 +Memory allocation (code space): 10 ------------------------------------------------------------------ - 0 11 Bra - 3 [\x{100}] - 11 11 Ket - 14 End + 0 6 Bra + 3 \x{100} + 6 6 Ket + 9 End ------------------------------------------------------------------ /\x80/8BM @@ -405,12 +405,12 @@ First char = 230 Need char = 158 /[\x{100}]/8BM -Memory allocation (code space): 15 +Memory allocation (code space): 10 ------------------------------------------------------------------ - 0 11 Bra - 3 [\x{100}] - 11 11 Ket - 14 End + 0 6 Bra + 3 \x{100} + 6 6 Ket + 9 End ------------------------------------------------------------------ /[Z\x{100}]/8BM diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 417225a..b35e6a7 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -7548,7 +7548,7 @@ Matched, but too many substrings /[^a]+a/BZi ------------------------------------------------------------------ Bra - /i [^A]++ + /i [^a]++ /i a Ket End @@ -7557,7 +7557,7 @@ Matched, but too many substrings /[^a]+A/BZi ------------------------------------------------------------------ Bra - /i [^A]++ + /i [^a]++ /i A Ket End diff --git a/testdata/testoutput5 b/testdata/testoutput5 index b63934d..8de96cf 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -79,14 +79,14 @@ Need char = 191 /[\x{100}]/8DZ ------------------------------------------------------------------ Bra - [\x{100}] + \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -No first char -No need char +First char = 196 +Need char = 128 /\x{ffffffff}/8 Failed: character value in \x{...} sequence is too large at offset 11 @@ -624,14 +624,14 @@ No need char /[\x{100}]/8DZ ------------------------------------------------------------------ Bra - [\x{100}] + \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf8 -No first char -No need char +First char = 196 +Need char = 128 \x{100} 0: \x{100} Z\x{100} |