Updating pcre_jit_test. Most of the JIT tests are working now in 16 bit mode.

git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@786 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-06 11:33:41 +0000
committer: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-06 11:33:41 +0000
commit: b4a0233a732c67c98886725229df86fc150b0e82 (patch)
tree: e0a12eaa6f6f8aedd3f5b76969cb0a12a78f1341
parent: a9839b968cee5828bf35dbcb05a31859a49ab7a2 (diff)
download: pcre-b4a0233a732c67c98886725229df86fc150b0e82.tar.gz
10 files changed, 471 insertions, 167 deletions
diff --git a/Makefile.am b/Makefile.am
index 817b01a..b64ccd5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -211,6 +211,7 @@ lib_LTLIBRARIES += libpcre16.la
 libpcre16_la_SOURCES = \
   pcre16_chartables.c \
   pcre16_compile.c \
+  pcre16_config.c \
   pcre16_exec.c \
   pcre16_fullinfo.c \
   pcre16_info.c \
diff --git a/pcre.h.in b/pcre.h.in
index b9ec777..ea11766 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -234,6 +234,7 @@ compatible. */
 #define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 #define PCRE_CONFIG_BSR                     8
 #define PCRE_CONFIG_JIT                     9
+#define PCRE_CONFIG_UTF16                  10
 
 /* Request types for pcre_study(). Do not re-arrange, in order to remain
 compatible. */
@@ -353,6 +354,7 @@ PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
 PCRE_EXP_DECL pcre *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
                   int *, const unsigned char *);
 PCRE_EXP_DECL int  pcre_config(int, void *);
+PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                   int *, int, const char *, char *, int);
 PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
diff --git a/pcre16_config.c b/pcre16_config.c
new file mode 100644
index 0000000..826b100
--- /dev/null
+++ b/pcre16_config.c
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_config.c"
+
+/* End of pcre16_config.c */
diff --git a/pcre_compile.c b/pcre_compile.c
index da22f59..bdfac5b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3738,8 +3738,8 @@ for (;; ptr++)
       {
       const pcre_uchar *oldptr;
 
-#ifdef SUPPORT_UTF8
-      if (utf && c > 127)
+#ifdef SUPPORT_UTF
+      if (utf && HAS_EXTRALEN(c))
         {                           /* Braces are required because the */
         GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
         }
@@ -4317,11 +4317,10 @@ for (;; ptr++)
 
 #ifdef SUPPORT_UTF
       if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
-#endif
-#ifndef COMPILE_PCRE8
+#elif !(defined COMPILE_PCRE8)
       if (c > 255)
 #endif
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
         {
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
@@ -4345,8 +4344,7 @@ for (;; ptr++)
 
         }
       else
-#endif  /* SUPPORT_UTF8 */
-
+#endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
       /* Handle a single-byte character */
         {
         classbits[c/8] |= (1 << (c&7));
@@ -4358,6 +4356,7 @@ for (;; ptr++)
         class_charcount++;
         class_lastchar = c;
         }
+
       }
 
     /* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -5849,7 +5848,7 @@ for (;; ptr++)
 
             for (i = 0; i < cd->names_found; i++)
               {
-              int crc = memcmp(name, slot+2, namelen);
+              int crc = memcmp(name, slot+2, IN_UCHARS(namelen));
               if (crc == 0)
                 {
                 if (slot[2+namelen] == 0)
@@ -7440,7 +7439,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
   int newnl = 0;
   int newbsr = 0;
 
-  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
     { skipatstart += 6; options |= PCRE_UCP; continue; }
@@ -7805,8 +7804,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
             if (cd->fcc[re->first_char] != re->first_char)
               re->flags |= PCRE_FCH_CASELESS;
             }
-          else if ((options & PCRE_UCP) != 0
-              && UCD_OTHERCASE(re->first_char) != re->first_char)
+          else if (UCD_OTHERCASE(re->first_char) != re->first_char)
             re->flags |= PCRE_FCH_CASELESS;
           }
         else
@@ -7843,13 +7841,12 @@ if (reqchar >= 0 &&
     /* We ignore non-ASCII first chars in 8 bit mode. */
     if (utf)
       {
-      if (re->first_char < 128)
+      if (re->req_char < 128)
         {
-        if (cd->fcc[re->first_char] != re->first_char)
+        if (cd->fcc[re->req_char] != re->req_char)
           re->flags |= PCRE_RCH_CASELESS;
         }
-      else if ((options & PCRE_UCP) != 0
-          && UCD_OTHERCASE(re->first_char) != re->first_char)
+      else if (UCD_OTHERCASE(re->req_char) != re->req_char)
         re->flags |= PCRE_RCH_CASELESS;
       }
     else
diff --git a/pcre_config.c b/pcre_config.c
index bf42c02..a7792f7 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -62,13 +62,26 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_config(int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_config(int what, void *where)
+#endif
 {
 switch (what)
   {
   case PCRE_CONFIG_UTF8:
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+  *((int *)where) = 1;
+#else
+  *((int *)where) = 0;
+#endif
+  break;
+
+  case PCRE_CONFIG_UTF16:
+#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
   *((int *)where) = 1;
 #else
   *((int *)where) = 0;
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 1bc96c1..7cceaae 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -3202,7 +3202,7 @@ if (!anchored)
       {
       first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-      if (first_char > 127 && utf && md->use_ucp)
+      if (utf && first_char > 127)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
@@ -3226,7 +3226,7 @@ if ((re->flags & PCRE_REQCHSET) != 0)
     {
     req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-    if (req_char > 127 && utf && md->use_ucp)
+    if (utf && req_char > 127)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }
diff --git a/pcre_exec.c b/pcre_exec.c
index bb1b60a..5f0a156 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6267,7 +6267,7 @@ if (!anchored)
       {
       first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-      if (first_char > 127 && utf && md->use_ucp)
+      if (utf && first_char > 127)
         first_char2 = UCD_OTHERCASE(first_char);
 #endif
       }
@@ -6289,7 +6289,7 @@ if ((re->flags & PCRE_REQCHSET) != 0)
     {
     req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-    if (req_char > 127 && utf && md->use_ucp)
+    if (utf && req_char > 127)
       req_char2 = UCD_OTHERCASE(req_char);
 #endif
     }
diff --git a/pcre_internal.h b/pcre_internal.h
index 4046e41..b93101f 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1166,7 +1166,12 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
 #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
 #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
 #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR           "UTF8)"
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            "UTF8)"
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            "UTF16)"
+#endif
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
 
@@ -1421,7 +1426,12 @@ only. */
 #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
+#endif
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index df158be..3b85b85 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -297,7 +297,7 @@ typedef struct compiler_common {
   jump_list *casefulcmp;
   jump_list *caselesscmp;
   BOOL jscript_compat;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   BOOL utf;
 #ifdef SUPPORT_UCP
   BOOL use_ucp;
@@ -306,7 +306,7 @@ typedef struct compiler_common {
 #ifdef COMPILE_PCRE8
   jump_list *utfreadtype8;
 #endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 #ifdef SUPPORT_UCP
   jump_list *getucd;
 #endif
@@ -500,7 +500,7 @@ switch(*cc)
   return cc + 1;
 
   case OP_ANYBYTE:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (common->utf) return NULL;
 #endif
   return cc + 1;
@@ -576,6 +576,8 @@ switch(*cc)
 
   case OP_NOTPROP:
   case OP_PROP:
+  return cc + 1 + 2;
+
   case OP_TYPEUPTO:
   case OP_TYPEMINUPTO:
   case OP_TYPEEXACT:
@@ -1267,7 +1269,7 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar*
 /* Detects if the character has an othercase. */
 unsigned int c;
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   GETCHAR(c, cc);
@@ -1279,6 +1281,9 @@ if (common->utf)
     return FALSE;
 #endif
     }
+#ifndef COMPILE_PCRE8
+  return common->fcc[c] != c;
+#endif
   }
 else
 #endif
@@ -1769,6 +1774,9 @@ if (newlinecheck)
   OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
   OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
   COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
+#ifdef COMPILE_PCRE16
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#endif
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
   nl = JUMP(SLJIT_JUMP);
   }
@@ -1776,7 +1784,7 @@ if (newlinecheck)
 mainloop = LABEL();
 
 /* Increasing the STR_PTR here requires one less jump in the most common case. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf) readuchar = TRUE;
 #endif
 if (newlinecheck) readuchar = TRUE;
@@ -1843,7 +1851,7 @@ if (caseless)
   {
   oc = TABLE_GET(first_char, common->fcc, first_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-  if (first_char > 127 && common->utf && common->use_ucp)
+  if (first_char > 127 && common->utf)
     oc = UCD_OTHERCASE(first_char);
 #endif
   }
@@ -2077,7 +2085,7 @@ if (caseless)
   {
   oc = TABLE_GET(req_char, common->fcc, req_char);
 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
-  if (req_char > 127 && common->utf && common->use_ucp)
+  if (req_char > 127 && common->utf)
     oc = UCD_OTHERCASE(req_char);
 #endif
   }
@@ -2265,7 +2273,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2289,7 +2297,7 @@ COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2323,7 +2331,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf)
   {
   COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
@@ -2415,8 +2423,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
 #undef CHAR1
 #undef CHAR2
 
-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
 
 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
 {
@@ -2436,8 +2443,7 @@ while (src1 < end1)
 return src2;
 }
 
-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
 
 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
     compare_context* context, jump_list **fallbacks)
@@ -2445,7 +2451,7 @@ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless,
 DEFINE_COMPILER;
 unsigned int othercasebit = 0;
 pcre_uchar *othercasechar = NULL;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 int utflength;
 #endif
 
@@ -2588,7 +2594,7 @@ do
 #endif
 
   cc++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   utflength--;
   }
 while (utflength > 0);
@@ -2646,7 +2652,7 @@ if ((*cc++ & XCL_MAP) != 0)
   OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
 #ifndef COMPILE_PCRE8
   jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
   if (common->utf)
     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
 #endif
@@ -2660,7 +2666,7 @@ if ((*cc++ & XCL_MAP) != 0)
 
 #ifndef COMPILE_PCRE8
   JUMPHERE(jump);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
   if (common->utf)
     JUMPHERE(jump);
 #endif
@@ -2795,7 +2801,7 @@ while (*cc != XCL_END)
   if (*cc == XCL_SINGLE)
     {
     cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2826,7 +2832,7 @@ while (*cc != XCL_END)
   else if (*cc == XCL_RANGE)
     {
     cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2835,7 +2841,7 @@ while (*cc != XCL_END)
 #endif
       c = *cc++;
     SET_CHAR_OFFSET(c);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (common->utf)
       {
       GETCHARINC(c, cc);
@@ -2963,7 +2969,7 @@ int length;
 unsigned int c, oc, bit;
 compare_context context;
 struct sljit_jump *jump[4];
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 struct sljit_label *label;
 #ifdef SUPPORT_UCP
 pcre_uchar propdata[5];
@@ -3063,7 +3069,7 @@ switch(type)
   OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   return cc;
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
   case OP_NOTPROP:
   case OP_PROP:
@@ -3279,7 +3285,7 @@ switch(type)
     }
   check_input_end(common, fallbacks);
   read_char(common);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (common->utf)
     {
     GETCHAR(c, cc);
@@ -3296,16 +3302,14 @@ switch(type)
 
   case OP_NOT:
   case OP_NOTI:
+  check_input_end(common, fallbacks);
+  length = 1;
 #ifdef SUPPORT_UTF
   if (common->utf)
     {
-    length = 1;
-    if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
-
-    check_input_end(common, fallbacks);
-    GETCHAR(c, cc);
-
-    if (c <= 127)
+#ifdef COMPILE_PCRE8
+    c = *cc;
+    if (c < 128)
       {
       OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
       if (type == OP_NOT || !char_has_othercase(common, cc))
@@ -3317,24 +3321,24 @@ switch(type)
         add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
         }
       /* Skip the variable-length character. */
-      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
       jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
-#ifdef COMPILE_PCRE8
-      OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
-#endif
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
       OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
       JUMPHERE(jump[0]);
-      return cc + length;
+      return cc + 1;
       }
     else
+#endif /* COMPILE_PCRE8 */
+      {
+      GETCHARLEN(c, cc, length);
       read_char(common);
+      }
     }
   else
-#endif
+#endif /* SUPPORT_UTF */
     {
-    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-    add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
-    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+    read_char(common);
     c = *cc;
     }
 
@@ -3363,10 +3367,11 @@ switch(type)
   read_char(common);
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   jump[0] = NULL;
-#ifdef SUPPORT_UTF8
-  /* This check can only be skipped in pure 8 bit mode. */
+#ifdef COMPILE_PCRE8
+  /* This check only affects 8 bit mode. In other modes, we
+  always need to compare the value with 255. */
   if (common->utf)
-#endif
+#endif /* COMPILE_PCRE8 */
     {
     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
     if (type == OP_CLASS)
@@ -3375,7 +3380,7 @@ switch(type)
       jump[0] = NULL;
       }
     }
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
   OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
   OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
   OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
@@ -3385,7 +3390,7 @@ switch(type)
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
   if (jump[0] != NULL)
     JUMPHERE(jump[0]);
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
   return cc + 32 / sizeof(pcre_uchar);
 
 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
@@ -3399,7 +3404,7 @@ switch(type)
   SLJIT_ASSERT(length > 0);
   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (common->utf)
     {
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
@@ -3411,7 +3416,7 @@ switch(type)
     return cc + LINK_SIZE;
     }
 #endif
-  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
+  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
   add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
   return cc + LINK_SIZE;
   }
@@ -3548,8 +3553,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
 if (withchecks && !common->jscript_compat)
   add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
 
-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
 if (common->utf && *cc == OP_REFI)
   {
   SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
@@ -3567,8 +3571,7 @@ if (common->utf && *cc == OP_REFI)
   OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
   }
 else
-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
   {
   OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
   if (withchecks)
@@ -6422,7 +6425,7 @@ common->vspace = NULL;
 common->casefulcmp = NULL;
 common->caselesscmp = NULL;
 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 common->utf = (re->options & PCRE_UTF8) != 0;
 #ifdef SUPPORT_UCP
@@ -6432,7 +6435,7 @@ common->utfreadchar = NULL;
 #ifdef COMPILE_PCRE8
 common->utfreadtype8 = NULL;
 #endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 #ifdef SUPPORT_UCP
 common->getucd = NULL;
 #endif
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 8a03272..e4d2432 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -51,18 +51,35 @@ POSSIBILITY OF SUCH DAMAGE.
 #define PCRE_BUG 0x80000000
 
 /*
- Hungarian utf8 characters
- \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
- \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
- \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
- \xc2\x85 = 0x85 (NExt Line = NEL)
- \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
- \xe2\x80\xa8 = 0x2028 (Line Separator)
- \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
- \xcc\x8d = 781 (Something with Mark property)
+ Letter characters:
+   \xe6\x92\xad = 0x64ad = 25773 (kanji)
+ Non-letter characters:
+   \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
+   \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+ Newlines:
+   \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
+   \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
+ Othercase pairs:
+   \xc3\xa9 = 0xe9 = 233 (e')
+      \xc3\x89 = 0xc9 = 201 (E')
+   \xc3\xa1 = 0xe1 = 225 (a')
+      \xc3\x81 = 0xc1 = 193 (A')
+   \xc8\xba = 0x23a = 570
+      \xe2\xb1\xa5 = 0x2c65 = 11365
+   \xe1\xbd\xb8 = 0x1f78 = 8056
+      \xe1\xbf\xb8 = 0x1ff8 = 8184
+   \xf0\x90\x90\x80 = 0x10400 = 66560
+      \xf0\x90\x90\xa8 = 0x10428 = 66600
+ Mark property:
+   \xcc\x8d = 0x30d = 781
+ Special:
+   \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
+   \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
+   \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
+   \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
+   \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
 */
 
-static void setstack(pcre_extra *extra);
 static int regression_tests(void);
 
 int main(void)
@@ -76,21 +93,12 @@ int main(void)
 	return regression_tests();
 }
 
-static pcre_jit_stack* callback(void *arg)
-{
-	return (pcre_jit_stack *)arg;
-}
-
-static void setstack(pcre_extra *extra)
-{
-	static pcre_jit_stack *stack;
-	if (stack) pcre_jit_stack_free(stack);
-	stack = pcre_jit_stack_alloc(1, 1024 * 1024);
-	pcre_assign_jit_stack(extra, callback, stack);
-}
-
 /* --------------------------------------------------------------------------------------- */
 
+#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
+#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
+#endif
+
 #define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
 #define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
 #define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
@@ -139,6 +147,10 @@ static struct regression_test_case regression_test_cases[] = {
 	{ CMA, 0, "\\Ca", "CDA" },
 	{ MA, 0, "\\Cx", "cda" },
 	{ CMA, 0, "\\Cx", "CDA" },
+	{ CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+	{ CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+	{ CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+	{ CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
 
 	/* Assertions. */
 	{ MUA, 0, "\\b[^A]", "A_B#" },
@@ -151,6 +163,7 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
 	{ MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
 	{ MUA, 0, "\\b.", "\xcd\xbe" },
+	{ CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
 	{ MA, 0, "\\R^", "\n" },
 	{ MA, 1, "^", "\n" },
 	{ 0, 0, "^ab", "ab" },
@@ -267,6 +280,7 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUA, 0, "\\b\\w+\\B", "x,a_cd" },
 	{ MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
 	{ CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
+	{ CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
 
 	/* Basic character sets. */
 	{ MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
@@ -449,6 +463,7 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
 	{ MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
 	{ PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
+	{ CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
 
 	/* Assertions. */
 	{ MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
@@ -601,111 +616,328 @@ static struct regression_test_case regression_test_cases[] = {
 	{ 0, 0, NULL, NULL }
 };
 
+pcre_jit_stack* callback(void *arg)
+{
+	return (pcre_jit_stack *)arg;
+}
+
+static void setstack(pcre_extra *extra, int realloc)
+{
+	static pcre_jit_stack *stack;
+
+	if (realloc) {
+		if (stack)
+			pcre_jit_stack_free(stack);
+		stack = pcre_jit_stack_alloc(1, 1024 * 1024);
+	}
+	/* Extra can be NULL. */
+	pcre_assign_jit_stack(extra, callback, stack);
+}
+
+#ifdef SUPPORT_PCRE16
+
+static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
+{
+	unsigned char *ptr = (unsigned char*)input;
+	PCRE_SCHAR16 *optr = output;
+	unsigned int c;
+
+	if (max_length == 0)
+		return 0;
+
+	while (*ptr && max_length > 1) {
+		c = 0;
+		if (offsetmap)
+			*offsetmap++ = (int)(ptr - (unsigned char*)input);
+
+		if (!(*ptr & 0x80))
+			c = *ptr++;
+		else if (!(*ptr & 0x20)) {
+			c = ((ptr[0] & 0x1f) << 6) | (ptr[1] & 0x3f);
+			ptr += 2;
+		} else if (!(*ptr & 0x10)) {
+			c = ((ptr[0] & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
+			ptr += 3;
+		} else if (!(*ptr & 0x08)) {
+			c = ((ptr[0] & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
+			ptr += 4;
+		}
+
+		if (c < 65536) {
+			*optr++ = c;
+			max_length--;
+		} else if (max_length <= 2) {
+			*optr = '\0';
+			return optr - output;
+		} else {
+			c -= 0x10000;
+			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
+			*optr++ = 0xdc00 | (c & 0x3ff);
+			max_length -= 2;
+			if (offsetmap)
+				offsetmap++;
+		}
+	}
+	if (offsetmap)
+		*offsetmap = (int)(ptr - (unsigned char*)input);
+	*optr = '\0';
+	return optr - output;
+}
+
+static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
+{
+	PCRE_SCHAR16 *optr = output;
+
+	if (max_length == 0)
+		return 0;
+
+	while (*input && max_length > 1) {
+		*optr++ = *input++;
+		max_length--;
+	}
+	*optr = '\0';
+	return optr - output;
+}
+
+#define REGTEST_MAX_LENGTH 4096
+static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
+static int regtest_offsetmap[REGTEST_MAX_LENGTH];
+
+#endif /* SUPPORT_PCRE16 */
+
 static int regression_tests(void)
 {
-	pcre *re;
 	struct regression_test_case *current = regression_test_cases;
 	const char *error;
-	pcre_extra *extra;
-	int utf8 = 0, ucp = 0;
-	int ovector1[32];
-	int ovector2[32];
-	int return_value1, return_value2;
-	int i, err_offs;
-	int total = 0, succesful = 0;
+	int i, err_offs, is_succesful;
+	int total = 0;
+	int succesful = 0;
 	int counter = 0;
-	int disabled_flags = PCRE_BUG;
+#ifdef SUPPORT_PCRE8
+	pcre *re8;
+	pcre_extra *extra8;
+	int ovector8_1[32];
+	int ovector8_2[32];
+	int return_value8_1, return_value8_2;
+	int utf8 = 0, ucp8 = 0;
+	int disabled_flags8 = PCRE_BUG;
+#endif
+#ifdef SUPPORT_PCRE16
+	pcre *re16;
+	pcre_extra *extra16;
+	int ovector16_1[32];
+	int ovector16_2[32];
+	int return_value16_1, return_value16_2;
+	int utf16 = 0, ucp16 = 0;
+	int disabled_flags16 = PCRE_BUG;
+	int length16;
+#endif
 
 	/* This test compares the behaviour of interpreter and JIT. Although disabling
-	utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
+	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
 	still considered successful from pcre_jit_test point of view. */
 
+	printf("Running JIT regression\n");
+
+#ifdef SUPPORT_PCRE8
 	pcre_config(PCRE_CONFIG_UTF8, &utf8);
-	pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+	pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
 	if (!utf8)
-		disabled_flags |= PCRE_UTF8;
-	if (!ucp)
-		disabled_flags |= PCRE_UCP;
+		disabled_flags8 |= PCRE_UTF8;
+	if (!ucp8)
+		disabled_flags8 |= PCRE_UCP;
+	printf(" in  8 bit mode with utf8  %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
+#endif
+#ifdef SUPPORT_PCRE16
+	pcre16_config(PCRE_CONFIG_UTF16, &utf16);
+	pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
+	if (!utf16)
+		disabled_flags16 |= PCRE_UTF8;
+	if (!ucp16)
+		disabled_flags16 |= PCRE_UCP;
+	printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
+#endif
 
-	printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
 	while (current->pattern) {
 		/* printf("\nPattern: %s :\n", current->pattern); */
 		total++;
 
 		error = NULL;
-		re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
-
-		if (!re) {
-			if (utf8 && ucp)
-				printf("\nCannot compile pattern: %s\n", current->pattern);
-			else {
-				/* Some patterns cannot be compiled when either of utf8
-				or ucp is disabled. We just skip them. */
-				printf(".");
-				succesful++;
+#ifdef SUPPORT_PCRE8
+		re8 = pcre_compile(current->pattern,
+			current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+			&error, &err_offs, NULL);
+
+		if (re8) {
+			error = NULL;
+			extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
+			if (!extra8) {
+				printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
+				pcre_free(re8);
+				re8 = NULL;
 			}
-			current++;
-			continue;
-		}
-
-		error = NULL;
-		extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
-		if (!extra) {
-			printf("\nCannot study pattern: %s\n", current->pattern);
-			current++;
-			continue;
-		}
-
-		if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
-			printf("\nJIT compiler does not support: %s\n", current->pattern);
-			current++;
-			continue;
-		}
+			if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+				printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
+				pcre_free_study(extra8);
+				pcre_free(re8);
+				re8 = NULL;
+			}
+		} else if (utf8 && ucp8)
+			printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif
+#ifdef SUPPORT_PCRE16
+		convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
+		re16 = pcre16_compile(regtest_buf,
+			current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+			&error, &err_offs, NULL);
+		if (re16) {
+			error = NULL;
+			extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
+			if (!extra16) {
+				printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
+				pcre_free(re16);
+				re16 = NULL;
+			}
+			if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+				printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
+				pcre_free_study(extra16);
+				pcre_free(re16);
+				re16 = NULL;
+			}
+		} else if (utf16 && ucp16)
+			printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif
 
 		counter++;
 		if ((counter & 0x3) != 0)
-			setstack(extra);
-
-		for (i = 0; i < 32; ++i)
-			ovector1[i] = -2;
-		return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
+			setstack(NULL, 1);
+
+#ifdef SUPPORT_PCRE8
+		if (re8) {
+			setstack(extra8, 0);
+			for (i = 0; i < 32; ++i)
+				ovector8_1[i] = -2;
+			return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
+
+			for (i = 0; i < 32; ++i)
+				ovector8_2[i] = -2;
+			return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
+		}
+#endif
 
-		for (i = 0; i < 32; ++i)
-			ovector2[i] = -2;
-		return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
+#ifdef SUPPORT_PCRE16
+		if (re16) {
+			setstack(extra16, 0);
+			if (current->flags & PCRE_UTF8)
+				length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
+			else
+				length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
+
+			for (i = 0; i < 32; ++i)
+				ovector16_1[i] = -2;
+			return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
+
+			for (i = 0; i < 32; ++i)
+				ovector16_2[i] = -2;
+			return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
+		}
+#endif
 
 		/* If PCRE_BUG is set, just run the test, but do not compare the results.
 		Segfaults can still be captured. */
-		if (!(current->flags & PCRE_BUG)) {
-			if (return_value1 != return_value2) {
-				printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
-				current++;
-				continue;
-			}
 
-			if (return_value1 >= 0) {
-				return_value1 *= 2;
-				err_offs = 0;
-				for (i = 0; i < return_value1; ++i)
-					if (ovector1[i] != ovector2[i]) {
-						printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
-						err_offs = 1;
+		is_succesful = 1;
+		if (!(current->flags & PCRE_BUG)) {
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+			if ((current->flags & PCRE_UTF8) && utf8 && utf16) {
+				/* All results must be the same. */
+				if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
+					printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
+						return_value8_1, return_value8_2, return_value16_1, return_value16_2,
+						total, current->pattern, current->input);
+					is_succesful = 0;
+				} else if (return_value8_1 >= 0) {
+					return_value8_1 *= 2;
+					/* Transform back the results. */
+					for (i = 0; i < return_value8_1; ++i) {
+						if (ovector16_1[i] >= 0)
+							ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
+						if (ovector16_2[i] >= 0)
+							ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
 					}
-				if (err_offs) {
-					current++;
-					continue;
+
+					for (i = 0; i < return_value8_1; ++i)
+						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
+							printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
+								i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
+								total, current->pattern, current->input);
+							is_succesful = 0;
+						}
+				}
+			} else {
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
+				/* Only the 8 bit and 16 bit results must be equal. */
+#ifdef SUPPORT_PCRE8
+				if (return_value8_1 != return_value8_2) {
+					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+						return_value8_1, return_value8_2, total, current->pattern, current->input);
+					is_succesful = 0;
+				} else if (return_value8_1 >= 0) {
+					return_value8_1 *= 2;
+					for (i = 0; i < return_value8_1; ++i)
+						if (ovector8_1[i] != ovector8_2[i]) {
+							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+								i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
+							is_succesful = 0;
+						}
 				}
+#endif
+
+#ifdef SUPPORT_PCRE16
+				if (return_value16_1 != return_value16_2) {
+					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+						return_value16_1, return_value16_2, total, current->pattern, current->input);
+					is_succesful = 0;
+				} else if (return_value16_1 >= 0) {
+					return_value16_1 *= 2;
+					for (i = 0; i < return_value16_1; ++i)
+						if (ovector16_1[i] != ovector16_2[i]) {
+							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+								i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
+							is_succesful = 0;
+						}
+				}
+#endif
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
 			}
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
 		}
 
-		pcre_free_study(extra);
-		pcre_free(re);
+		if (is_succesful)
+			succesful++;
+
+#ifdef SUPPORT_PCRE8
+		if (re8) {
+			pcre_free_study(extra8);
+			pcre_free(re8);
+		}
+#endif
+#ifdef SUPPORT_PCRE16
+		if (re16) {
+			pcre16_free_study(extra16);
+			pcre_free(re16);
+		}
+#endif
 
 		/* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
 		printf(".");
 		fflush(stdout);
 		current++;
-		succesful++;
 	}
 
 	if (total == succesful) {
@@ -717,4 +949,5 @@ static int regression_tests(void)
 	}
 }
 
+
 /* End of pcre_jit_test.c */
author	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-06 11:33:41 +0000
committer	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-06 11:33:41 +0000
commit	b4a0233a732c67c98886725229df86fc150b0e82 (patch)
tree	e0a12eaa6f6f8aedd3f5b76969cb0a12a78f1341
parent	a9839b968cee5828bf35dbcb05a31859a49ab7a2 (diff)
download	pcre-b4a0233a732c67c98886725229df86fc150b0e82.tar.gz