Optimization fixes for ranges contains only a single character

git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@798 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-11 18:07:25 +0000
committer: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-11 18:07:25 +0000
commit: 02b9094df724302cd24f71f6a28ec3df318cec71 (patch)
tree: 550804928e374af17e464536f206369cbc7ec4d8
parent: d277aeef628cc77e6daafe2771970319476d2585 (diff)
download: pcre-02b9094df724302cd24f71f6a28ec3df318cec71.tar.gz
8 files changed, 160 insertions, 92 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 3fa7c67..2be0936 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3413,7 +3413,8 @@ for (;; ptr++)
   BOOL is_quantifier;
   BOOL is_recurse;
   BOOL reset_bracount;
-  int class_charcount;
+  int class_has_8bitchar;
+  int class_single_char;
   int class_lastchar;
   int newoptions;
   int recno;
@@ -3710,11 +3711,13 @@ for (;; ptr++)
 
     should_flip_negation = FALSE;
 
-    /* Keep a count of chars with values < 256 so that we can optimize the case
-    of just a single character (as long as it's < 256). However, For higher
-    valued UTF-8 characters, we don't yet do any optimization. */
+    /* For optimization purposes, we track some properties of the class.
+    class_has_8bitchar will be non-zero, if the class contains at least one
+    < 256 character. class_single_char will be 1, if the class only contains
+    a single character. */
 
-    class_charcount = 0;
+    class_has_8bitchar = 0;
+    class_single_char = 0;
     class_lastchar = -1;
 
     /* Initialize the 32-char bit map to all zeros. We build the map in a
@@ -3870,16 +3873,20 @@ for (;; ptr++)
           for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
 
         ptr = tempptr + 1;
-        class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
+        /* Every class contains at least one < 256 characters. */
+        class_has_8bitchar = 1;
+        /* Every class contains at least two characters. */
+        class_single_char = 2;
         continue;    /* End of POSIX syntax handling */
         }
 
       /* Backslash may introduce a single character, or it may introduce one
       of the specials, which just set a flag. The sequence \b is a special
       case. Inside a class (and only there) it is treated as backspace. We
-      assume that other escapes have more than one character in them, so set
-      class_charcount bigger than one. Unrecognized escapes fall through and
-      are either treated as literal characters (by default), or are faulted if
+      assume that other escapes have more than one character in them, so
+      speculatively set both class_has_8bitchar class_single_char bigger
+      than one. Unrecognized escapes fall through and are either treated
+      as literal characters (by default), or are faulted if
       PCRE_EXTRA is set. */
 
       if (c == CHAR_BACKSLASH)
@@ -3902,7 +3909,10 @@ for (;; ptr++)
         if (c < 0)
           {
           register const pcre_uint8 *cbits = cd->cbits;
-          class_charcount += 2;     /* Greater than 1 is what matters */
+          /* Every class contains at least two < 256 characters. */
+          class_has_8bitchar++;
+          /* Every class contains at least two characters. */
+          class_single_char += 2;
 
           switch (-c)
             {
@@ -3915,7 +3925,7 @@ for (;; ptr++)
             case ESC_SU:
             nestptr = ptr;
             ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
-            class_charcount -= 2;                /* Undo! */
+            class_has_8bitchar--;                /* Undo! */
             continue;
 #endif
             case ESC_d:
@@ -4081,7 +4091,7 @@ for (;; ptr++)
                 XCL_PROP : XCL_NOTPROP;
               *class_uchardata++ = ptype;
               *class_uchardata++ = pdata;
-              class_charcount -= 2;   /* Not a < 256 character */
+              class_has_8bitchar--;                /* Undo! */
               continue;
               }
 #endif
@@ -4095,14 +4105,15 @@ for (;; ptr++)
               *errorcodeptr = ERR7;
               goto FAILED;
               }
-            class_charcount -= 2;  /* Undo the default count from above */
-            c = *ptr;              /* Get the final character and fall through */
+            class_has_8bitchar--;    /* Undo the speculative increase. */
+            class_single_char -= 2;  /* Undo the speculative increase. */
+            c = *ptr;                /* Get the final character and fall through */
             break;
             }
           }
 
         /* Fall through if we have a single character (c >= 0). This may be
-        greater than 256 mode. */
+        greater than 256. */
 
         }   /* End of backslash handling */
 
@@ -4195,6 +4206,10 @@ for (;; ptr++)
 
         if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
 
+        /* Since we found a character range, single character optimizations
+        cannot be done anymore. */
+        class_single_char = 2;
+
         /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
         matching, we have to use an XCLASS with extra data items. Caseless
         matching for characters > 127 is available only if UCP support is
@@ -4323,8 +4338,7 @@ for (;; ptr++)
         /* We use the bit map for 8 bit mode, or when the characters fall
         partially or entirely to [0-255] ([0-127] for UCP) ranges. */
 
-        class_charcount += d - c + 1;
-        class_lastchar = d;
+        class_has_8bitchar = 1;
 
         /* We can save a bit of time by skipping this in the pre-compile. */
 
@@ -4347,8 +4361,11 @@ for (;; ptr++)
 
       LONE_SINGLE_CHARACTER:
 
-      /* Handle a character that cannot go in the bit map */
+      /* Only the value of 1 matters for class_single_char. */
+      if (class_single_char < 2) class_single_char++;
+      class_lastchar = c;
 
+      /* Handle a character that cannot go in the bit map */
 #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
       if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
 #elif defined SUPPORT_UTF
@@ -4396,14 +4413,13 @@ for (;; ptr++)
 #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
       /* Handle a single-byte character */
         {
+        class_has_8bitchar = 1;
         classbits[c/8] |= (1 << (c&7));
         if ((options & PCRE_CASELESS) != 0)
           {
           c = cd->fcc[c];   /* flip case */
           classbits[c/8] |= (1 << (c&7));
           }
-        class_charcount++;
-        class_lastchar = c;
         }
 
       }
@@ -4443,15 +4459,15 @@ for (;; ptr++)
     of reqchar, save the previous value for reinstating. */
 
 #ifdef SUPPORT_UTF
-    if (class_charcount == 1 && !xclass &&
-      (!utf || !negate_class || class_lastchar < 128))
+    if (class_single_char == 1 && (!utf || !negate_class
+      || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
 #else
-    if (class_charcount == 1)
+    if (class_single_char == 1)
 #endif
       {
       zeroreqchar = reqchar;
 
-      /* The OP_NOT[I] opcodes work on one-byte characters only. */
+      /* The OP_NOT[I] opcodes work on single characters only. */
 
       if (negate_class)
         {
@@ -4466,7 +4482,7 @@ for (;; ptr++)
       then we can handle this with the normal one-character code. */
 
 #ifdef SUPPORT_UTF
-      if (utf && class_lastchar > 127)
+      if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR)
         mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
       else
 #endif
@@ -4510,7 +4526,7 @@ for (;; ptr++)
       /* If the map is required, move up the extra data to make room for it;
       otherwise just move the code pointer to the end of the extra data. */
 
-      if (class_charcount > 0)
+      if (class_has_8bitchar > 0)
         {
         *code++ |= XCL_MAP;
         memmove(code + (32 / sizeof(pcre_uchar)), code,
@@ -6686,7 +6702,7 @@ for (;; ptr++)
     handle it as a data character. */
 
 #ifdef SUPPORT_UTF
-    if (utf && c > 127)
+    if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
       mclength = PRIV(ord2utf)(c, mcbuffer);
     else
 #endif
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 58197ce..2b48eda 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -128,22 +128,27 @@ static const pcre_uint8 coptable[] = {
   1,                             /* noti                                   */
   /* Positive single-char repeats                                          */
   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
-  3, 3, 3,                       /* upto, minupto, exact                   */
-  1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
+  1+IMM2_SIZE,                   /* exact                                  */
+  1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
   1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
-  3, 3, 3,                       /* upto I, minupto I, exact I             */
-  1, 1, 1, 3,                    /* *+I, ++I, ?+I, upto+I                  */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
+  1+IMM2_SIZE,                   /* exact I                                */
+  1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
   /* Negative single-char repeats - only for chars < 256                   */
   1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
-  3, 3, 3,                       /* NOT upto, minupto, exact               */
-  1, 1, 1, 3,                    /* NOT *+, ++, ?+, upto+                  */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
+  1+IMM2_SIZE,                   /* NOT exact                              */
+  1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
   1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
-  3, 3, 3,                       /* NOT upto I, minupto I, exact I         */
-  1, 1, 1, 3,                    /* NOT *+I, ++I, ?+I, upto+I              */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
+  1+IMM2_SIZE,                   /* NOT exact I                            */
+  1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
   /* Positive type repeats                                                 */
   1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
-  3, 3, 3,                       /* Type upto, minupto, exact              */
-  1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
+  1+IMM2_SIZE,                   /* Type exact                             */
+  1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
   /* Character class & ref repeats                                         */
   0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
   0, 0,                          /* CRRANGE, CRMINRANGE                    */
@@ -296,7 +301,7 @@ Returns:       nothing
 */
 
 static void
-pchars(unsigned char *p, int length, FILE *f)
+pchars(const pcre_uchar *p, int length, FILE *f)
 {
 int c;
 while (length-- > 0)
@@ -582,7 +587,7 @@ for (;;)
 
 #ifdef PCRE_DEBUG
   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
-  pchars((pcre_uchar *)ptr, strlen((char *)ptr), stdout);
+  pchars(ptr, STRLEN_UC(ptr), stdout);
   printf("\"\n");
 
   printf("%.*sActive states: ", rlevel*2-2, SP);
diff --git a/pcre_exec.c b/pcre_exec.c
index 9aa07a7..5d85e4b 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -3085,7 +3085,10 @@ for (;;)
 
       if (fc < 128)
         {
-        if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+        if (md->lcc[fc]
+            != TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH);
+        ecode++;
+        eptr++;
         }
 
       /* Otherwise we must pick up the subject character */
@@ -3316,7 +3319,7 @@ for (;;)
     if (op >= OP_STARI)  /* Caseless */
       {
 #ifdef COMPILE_PCRE8
-      /* fc must be < 128 */
+      /* fc must be < 128 if UTF is enabled. */
       foc = md->fcc[fc];
 #else
 #ifdef SUPPORT_UTF
@@ -3459,11 +3462,25 @@ for (;;)
     GETCHARINCTEST(c, eptr);
     if (op == OP_NOTI)         /* The caseless case */
       {
-#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
-      if (c < 256)
-#endif
-        c = md->lcc[c];
-      if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
+      register int ch, och;
+      ch = *ecode++;
+#ifdef COMPILE_PCRE8
+      /* ch must be < 128 if UTF is enabled. */
+      och = md->fcc[ch];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+      if (utf && ch > 127)
+        och = UCD_OTHERCASE(ch);
+#else
+      if (utf && ch > 127)
+        och = ch;
+#endif /* SUPPORT_UCP */
+      else
+#endif /* SUPPORT_UTF */
+        och = TABLE_GET(ch, md->fcc, ch);
+#endif /* COMPILE_PCRE8 */
+      if (ch == c || och == c) MRRETURN(MATCH_NOMATCH);
       }
     else    /* Caseful */
       {
@@ -3562,7 +3579,22 @@ for (;;)
 
     if (op >= OP_NOTSTARI)     /* Caseless */
       {
-      fc = TABLE_GET(fc, md->lcc, fc);
+#ifdef COMPILE_PCRE8
+      /* fc must be < 128 if UTF is enabled. */
+      foc = md->fcc[fc];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+      if (utf && fc > 127)
+        foc = UCD_OTHERCASE(fc);
+#else
+      if (utf && fc > 127)
+        foc = fc;
+#endif /* SUPPORT_UCP */
+      else
+#endif /* SUPPORT_UTF */
+        foc = TABLE_GET(fc, md->fcc, fc);
+#endif /* COMPILE_PCRE8 */
 
 #ifdef SUPPORT_UTF
       if (utf)
@@ -3576,8 +3608,7 @@ for (;;)
             MRRETURN(MATCH_NOMATCH);
             }
           GETCHARINC(d, eptr);
-          if (d < 256) d = md->lcc[d];
-          if (fc == d) MRRETURN(MATCH_NOMATCH);
+          if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH);
           }
         }
       else
@@ -3591,7 +3622,8 @@ for (;;)
             SCHECK_PARTIAL();
             MRRETURN(MATCH_NOMATCH);
             }
-          if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+          if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         }
 
@@ -3614,8 +3646,7 @@ for (;;)
               MRRETURN(MATCH_NOMATCH);
               }
             GETCHARINC(d, eptr);
-            if (d < 256) d = md->lcc[d];
-            if (fc == d) MRRETURN(MATCH_NOMATCH);
+            if (fc == d || foc == d) MRRETURN(MATCH_NOMATCH);
             }
           }
         else
@@ -3632,7 +3663,8 @@ for (;;)
               SCHECK_PARTIAL();
               MRRETURN(MATCH_NOMATCH);
               }
-            if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
+            if (fc == *eptr || foc == *eptr) MRRETURN(MATCH_NOMATCH);
+            eptr++;
             }
           }
         /* Control never gets here */
@@ -3657,8 +3689,7 @@ for (;;)
               break;
               }
             GETCHARLEN(d, eptr, len);
-            if (d < 256) d = md->lcc[d];
-            if (fc == d) break;
+            if (fc == d || foc == d) break;
             eptr += len;
             }
         if (possessive) continue;
@@ -3681,7 +3712,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if (fc == md->lcc[*eptr]) break;
+            if (fc == *eptr || foc == *eptr) break;
             eptr++;
             }
           if (possessive) continue;
diff --git a/pcre_internal.h b/pcre_internal.h
index 9a20e73..fa0fb8b 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -532,6 +532,7 @@ UTF support is omitted, we don't even define them. */
 
 #ifndef SUPPORT_UTF
 
+/* #define MAX_VALUE_FOR_SINGLE_CHAR */
 /* #define HAS_EXTRALEN(c) */
 /* #define GET_EXTRALEN(c) */
 /* #define NOT_FIRSTCHAR(c) */
@@ -554,6 +555,10 @@ from the tables whose names start with PRIV(utf8_table). They were rewritten by
 a user so as not to use loops, because in some environments this gives a
 significant performance advantage, and it seems never to do any harm. */
 
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 127
+
 /* Tests whether the code point needs extra characters to decode. */
 
 #define HAS_EXTRALEN(c) ((c) >= 0xc0)
@@ -721,6 +726,10 @@ because almost all calls are already within a block of UTF-8 only code. */
 
 #ifdef COMPILE_PCRE16
 
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 65535
+
 /* Tests whether the code point needs extra characters to decode. */
 
 #define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index d2c2027..a1fd47b 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -290,6 +290,10 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
 	{ CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
 	{ CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
+	{ CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
+	{ CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
+	{ MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
+	{ MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
 
 	/* Basic character sets. */
 	{ MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
@@ -626,6 +630,9 @@ static struct regression_test_case regression_test_cases[] = {
 	{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
 	{ CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
 	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
+	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
+	{ MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
+	{ MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
 
 	/* Deep recursion. */
 	{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
@@ -736,9 +743,9 @@ static int regression_tests(void)
 {
 	struct regression_test_case *current = regression_test_cases;
 	const char *error;
-	int i, err_offs, is_succesful;
+	int i, err_offs, is_successful;
 	int total = 0;
-	int succesful = 0;
+	int successful = 0;
 	int counter = 0;
 #ifdef SUPPORT_PCRE8
 	pcre *re8;
@@ -889,7 +896,7 @@ static int regression_tests(void)
 		/* If F_DIFF is set, just run the test, but do not compare the results.
 		Segfaults can still be captured. */
 
-		is_succesful = 1;
+		is_successful = 1;
 		if (!(current->start_offset & F_DIFF)) {
 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
 			if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
@@ -898,7 +905,7 @@ static int regression_tests(void)
 					printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
 						return_value8_1, return_value8_2, return_value16_1, return_value16_2,
 						total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				} else if (return_value8_1 >= 0) {
 					return_value8_1 *= 2;
 					/* Transform back the results. */
@@ -916,7 +923,7 @@ static int regression_tests(void)
 							printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
 								i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
 								total, current->pattern, current->input);
-							is_succesful = 0;
+							is_successful = 0;
 						}
 				}
 			} else {
@@ -926,14 +933,14 @@ static int regression_tests(void)
 				if (return_value8_1 != return_value8_2) {
 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
 						return_value8_1, return_value8_2, total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				} else if (return_value8_1 >= 0) {
 					return_value8_1 *= 2;
 					for (i = 0; i < return_value8_1; ++i)
 						if (ovector8_1[i] != ovector8_2[i]) {
 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
 								i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
-							is_succesful = 0;
+							is_successful = 0;
 						}
 				}
 #endif
@@ -942,14 +949,14 @@ static int regression_tests(void)
 				if (return_value16_1 != return_value16_2) {
 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
 						return_value16_1, return_value16_2, total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				} else if (return_value16_1 >= 0) {
 					return_value16_1 *= 2;
 					for (i = 0; i < return_value16_1; ++i)
 						if (ovector16_1[i] != ovector16_2[i]) {
 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
 								i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
-							is_succesful = 0;
+							is_successful = 0;
 						}
 				}
 #endif
@@ -959,19 +966,19 @@ static int regression_tests(void)
 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
 		}
 
-		if (is_succesful) {
+		if (is_successful) {
 #ifdef SUPPORT_PCRE8
 			if (!(current->start_offset & F_NO8)) {
 				if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
 						total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				}
 
 				if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
 						total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				}
 			}
 #endif
@@ -980,20 +987,20 @@ static int regression_tests(void)
 				if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
 						total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				}
 
 				if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
 						total, current->pattern, current->input);
-					is_succesful = 0;
+					is_successful = 0;
 				}
 			}
 #endif
 		}
 
-		if (is_succesful)
-			succesful++;
+		if (is_successful)
+			successful++;
 
 #ifdef SUPPORT_PCRE8
 		if (re8) {
@@ -1014,11 +1021,11 @@ static int regression_tests(void)
 		current++;
 	}
 
-	if (total == succesful) {
+	if (total == successful) {
 		printf("\nAll JIT regression tests are successfully passed.\n");
 		return 0;
 	} else {
-		printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
+		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
 		return 1;
 	}
 }
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index ef9b82c..62d6f3e 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -339,12 +339,12 @@ Memory allocation (code space): 10
 ------------------------------------------------------------------
 
 /[\x{100}]/8BM
-Memory allocation (code space): 15
+Memory allocation (code space): 10
 ------------------------------------------------------------------
-  0  11 Bra
-  3     [\x{100}]
- 11  11 Ket
- 14     End
+  0   6 Bra
+  3     \x{100}
+  6   6 Ket
+  9     End
 ------------------------------------------------------------------
 
 /\x80/8BM
@@ -405,12 +405,12 @@ First char = 230
 Need char = 158
 
 /[\x{100}]/8BM
-Memory allocation (code space): 15
+Memory allocation (code space): 10
 ------------------------------------------------------------------
-  0  11 Bra
-  3     [\x{100}]
- 11  11 Ket
- 14     End
+  0   6 Bra
+  3     \x{100}
+  6   6 Ket
+  9     End
 ------------------------------------------------------------------
 
 /[Z\x{100}]/8BM
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 417225a..b35e6a7 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -7548,7 +7548,7 @@ Matched, but too many substrings
 /[^a]+a/BZi
 ------------------------------------------------------------------
         Bra
-     /i [^A]++
+     /i [^a]++
      /i a
         Ket
         End
@@ -7557,7 +7557,7 @@ Matched, but too many substrings
 /[^a]+A/BZi
 ------------------------------------------------------------------
         Bra
-     /i [^A]++
+     /i [^a]++
      /i A
         Ket
         End
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b63934d..8de96cf 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -79,14 +79,14 @@ Need char = 191
 /[\x{100}]/8DZ
 ------------------------------------------------------------------
         Bra
-        [\x{100}]
+        \x{100}
         Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
 Options: utf8
-No first char
-No need char
+First char = 196
+Need char = 128
 
 /\x{ffffffff}/8
 Failed: character value in \x{...} sequence is too large at offset 11
@@ -624,14 +624,14 @@ No need char
 /[\x{100}]/8DZ
 ------------------------------------------------------------------
         Bra
-        [\x{100}]
+        \x{100}
         Ket
         End
 ------------------------------------------------------------------
 Capturing subpattern count = 0
 Options: utf8
-No first char
-No need char
+First char = 196
+Need char = 128
     \x{100}
  0: \x{100}
     Z\x{100}
author	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-11 18:07:25 +0000
committer	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-11 18:07:25 +0000
commit	02b9094df724302cd24f71f6a28ec3df318cec71 (patch)
tree	550804928e374af17e464536f206369cbc7ec4d8
parent	d277aeef628cc77e6daafe2771970319476d2585 (diff)
download	pcre-02b9094df724302cd24f71f6a28ec3df318cec71.tar.gz