Minor issues after merge.

git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@802 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-13 09:52:20 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-13 09:52:20 +0000
commit: d6d90f76799f04fd02200858f0a81b26333f7f2f (patch)
tree: 0ecbaf129f4ddb8e1450bb41406033322853d3cd
parent: df3f8297fbe5e0a4c395e9021ecf176fdd6dab52 (diff)
download: pcre-d6d90f76799f04fd02200858f0a81b26333f7f2f.tar.gz
4 files changed, 33 insertions, 15 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 8dee2fb..fcc734f 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3762,7 +3762,7 @@ for (;; ptr++)
 
     /* For optimization purposes, we track some properties of the class.
     class_has_8bitchar will be non-zero, if the class contains at least one
-    < 256 character. class_single_char will be 1, if the class only contains
+    < 256 character. class_single_char will be 1 if the class contains only
     a single character. */
 
     class_has_8bitchar = 0;
@@ -3933,7 +3933,7 @@ for (;; ptr++)
       of the specials, which just set a flag. The sequence \b is a special
       case. Inside a class (and only there) it is treated as backspace. We
       assume that other escapes have more than one character in them, so
-      speculatively set both class_has_8bitchar class_single_char bigger
+      speculatively set both class_has_8bitchar and class_single_char bigger
       than one. Unrecognized escapes fall through and are either treated
       as literal characters (by default), or are faulted if
       PCRE_EXTRA is set. */
@@ -4420,6 +4420,7 @@ for (;; ptr++)
       class_lastchar = c;
 
       /* Handle a character that cannot go in the bit map */
+       
 #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
       if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
 #elif defined SUPPORT_UTF
@@ -4427,15 +4428,15 @@ for (;; ptr++)
 #elif !(defined COMPILE_PCRE8)
       if (c > 255)
 #endif
+
 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
         {
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
 #ifdef SUPPORT_UTF
 #ifndef COMPILE_PCRE8
-        /* In non 8 bit mode, we can get here even
-        if we are not in UTF mode. */
-        if (!utf)
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
+        if (!utf) 
           *class_uchardata++ = c;
         else
 #endif
@@ -4448,8 +4449,7 @@ for (;; ptr++)
 #ifdef COMPILE_PCRE8
         if ((options & PCRE_CASELESS) != 0)
 #else
-        /* In non 8 bit mode, we can get here even
-        if we are not in UTF mode. */
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
         if (utf && (options & PCRE_CASELESS) != 0)
 #endif
           {
@@ -4465,7 +4465,7 @@ for (;; ptr++)
             However, that uses less memory, and so if this happens to be at the
             end of the regex, there will not be enough memory in the real
             compile for this temporary storage. */
-              
+            
             if (lengthptr != NULL)
               {
               *lengthptr += class_uchardata - class_uchardata_base;
@@ -4478,6 +4478,7 @@ for (;; ptr++)
         }
       else
 #endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
+
       /* Handle a single-byte character */
         {
         class_has_8bitchar = 1;
@@ -4488,7 +4489,6 @@ for (;; ptr++)
           classbits[c/8] |= (1 << (c&7));
           }
         }
-
       }
 
     /* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -4508,11 +4508,9 @@ for (;; ptr++)
       goto FAILED;
       }
 
-    /* COMMENT NEEDS FIXING - no longer true.
-    If class_charcount is 1, we saw precisely one character whose value is
-    less than 256. As long as there were no characters >= 128 and there was no
-    use of \p or \P, in other words, no use of any XCLASS features, we can
-    optimize.
+    /* If class_charcount is 1, we saw precisely one character. As long as
+    there were no negated characters >= 128 and there was no use of \p or \P,
+    in other words, no use of any XCLASS features, we can optimize.
 
     In UTF-8 mode, we can optimize the negative case only if there were no
     characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
diff --git a/pcre_study.c b/pcre_study.c
index a2b1c06..e04eea7 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -1433,7 +1433,7 @@ if (bits_set || min > 0
     study->flags |= PCRE_STUDY_MAPPED;
     memcpy(study->start_bits, start_bits, sizeof(start_bits));
     }
-  else memset(study->start_bits, 0, 32 * sizeof(pcre_uchar));
+  else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
 
 #ifdef PCRE_DEBUG
   if (bits_set)
diff --git a/testdata/testinput13 b/testdata/testinput13
index e91c24e..eba1728 100644
--- a/testdata/testinput13
+++ b/testdata/testinput13
@@ -580,4 +580,8 @@ of case for anything other than the ASCII letters. --/
 
 /(?<=ab\Cde)X/8
 
+/[ⱥ]/8iBZ
+
+/[^ⱥ]/8iBZ
+
 /-- End of testinput13 --/
diff --git a/testdata/testoutput13 b/testdata/testoutput13
index 13cc0d0..3151699 100644
--- a/testdata/testoutput13
+++ b/testdata/testoutput13
@@ -1289,4 +1289,20 @@ No match
 /(?<=ab\Cde)X/8
 Failed: \C not allowed in lookbehind assertion at offset 10
 
+/[ⱥ]/8iBZ
+------------------------------------------------------------------
+        Bra
+     /i \x{2c65}
+        Ket
+        End
+------------------------------------------------------------------
+
+/[^ⱥ]/8iBZ
+------------------------------------------------------------------
+        Bra
+        [^\x{2c65}\x{23a}]
+        Ket
+        End
+------------------------------------------------------------------
+
 /-- End of testinput13 --/
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-13 09:52:20 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-13 09:52:20 +0000
commit	d6d90f76799f04fd02200858f0a81b26333f7f2f (patch)
tree	0ecbaf129f4ddb8e1450bb41406033322853d3cd
parent	df3f8297fbe5e0a4c395e9021ecf176fdd6dab52 (diff)
download	pcre-d6d90f76799f04fd02200858f0a81b26333f7f2f.tar.gz