extending the 16 bit API, mode check, and fixes

git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@795 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-10 02:20:06 +0000
committer: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-10 02:20:06 +0000
commit: 669e6f0bbc3b07f6df3b0d0cafba3555e39e433c (patch)
tree: 37c97c3fa732981cf8d2dbed54d27cca37fa8fac
parent: 24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (diff)
download: pcre-669e6f0bbc3b07f6df3b0d0cafba3555e39e433c.tar.gz
20 files changed, 484 insertions, 154 deletions
diff --git a/Makefile.am b/Makefile.am
index b64ccd5..ac2c675 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -212,8 +212,10 @@ libpcre16_la_SOURCES = \
   pcre16_chartables.c \
   pcre16_compile.c \
   pcre16_config.c \
+  pcre16_dfa_exec.c \
   pcre16_exec.c \
   pcre16_fullinfo.c \
+  pcre16_get.c \
   pcre16_info.c \
   pcre16_jit_compile.c \
   pcre16_newline.c \
diff --git a/pcre.h.in b/pcre.h.in
index ea11766..e83f2cf 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -166,6 +166,7 @@ compile-time only bits for runtime options, or vice versa. */
 #define PCRE_ERROR_SHORTUTF8      (-25)
 #define PCRE_ERROR_RECURSELOOP    (-26)
 #define PCRE_ERROR_JIT_STACKLIMIT (-27)
+#define PCRE_ERROR_BADMODE        (-28)
 
 /* Specific error codes for UTF-8 validity checks */
 
@@ -357,29 +358,46 @@ PCRE_EXP_DECL int  pcre_config(int, void *);
 PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                   int *, int, const char *, char *, int);
-PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
-                  int);
+PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SCHAR16 *, int);
+PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int,
+                  char *, int);
+PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SCHAR16 *, int);
 PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
                   const char *, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre *, const pcre_extra *,
+                  PCRE_SPTR16, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
                    int, int, int, int *, int);
 PCRE_EXP_DECL int  pcre16_exec(const pcre *, const pcre_extra *, PCRE_SPTR16,
                    int, int, int, int *, int);
 PCRE_EXP_DECL void pcre_free_substring(const char *);
+PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre_free_substring_list(const char **);
+PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
                   void *);
 PCRE_EXP_DECL int  pcre16_fullinfo(const pcre *, const pcre_extra *, int,
                   void *);
 PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
                   int *, int, const char *, const char **);
+PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
+PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre *, PCRE_SPTR16);
 PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
                   char **, char **);
+PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre *, PCRE_SPTR16,
+                  PCRE_SCHAR16 **, PCRE_SCHAR16 **);
 PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
                   const char **);
+PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
                   const char ***);
+PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
+                  PCRE_SPTR16 **);
 PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
 PCRE_EXP_DECL int  pcre16_info(const pcre *, int *, int *);
 PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
diff --git a/pcre16_dfa_exec.c b/pcre16_dfa_exec.c
new file mode 100644
index 0000000..dc6ea49
--- /dev/null
+++ b/pcre16_dfa_exec.c
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_dfa_exec.c"
+
+/* End of pcre16_dfa_exec.c */
diff --git a/pcre16_get.c b/pcre16_get.c
new file mode 100644
index 0000000..0b9bd61
--- /dev/null
+++ b/pcre16_get.c
@@ -0,0 +1,45 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+           Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_get.c"
+
+/* End of pcre16_get.c */
diff --git a/pcre16_ord2utf16.c b/pcre16_ord2utf16.c
index 99bed29..c0d3ee5 100644
--- a/pcre16_ord2utf16.c
+++ b/pcre16_ord2utf16.c
@@ -87,7 +87,7 @@ return 2;
 
 #else /* SUPPORT_UTF */
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
-(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer);  /* called when SUPPORT_UTF is not defined. */
 return 0;
 #endif /* SUPPORT_UTF */
 }
diff --git a/pcre_compile.c b/pcre_compile.c
index 24a7b1c..3fa7c67 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -2357,7 +2357,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     actual length is stored in the compiled code, so we must update "code"
     here. */
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
     ccode = code += GET(code, 1);
     goto CHECK_CLASS_REPEAT;
@@ -2367,7 +2367,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     case OP_NCLASS:
     ccode = code + PRIV(OP_lengths)[OP_CLASS];
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     CHECK_CLASS_REPEAT:
 #endif
 
@@ -2980,7 +2980,7 @@ the next item is a character. */
 if (next >= 0) switch(op_code)
   {
   case OP_CHAR:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
@@ -2992,13 +2992,13 @@ if (next >= 0) switch(op_code)
   high-valued characters. */
 
   case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
 #endif
   if (c == next) return FALSE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (utf)
     {
     unsigned int othercase;
@@ -3011,7 +3011,7 @@ if (next >= 0) switch(op_code)
     return (unsigned int)c != othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
 
   /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
@@ -3023,7 +3023,7 @@ if (next >= 0) switch(op_code)
 
   case OP_NOTI:
   if ((c = *previous) == next) return TRUE;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   if (utf)
     {
     unsigned int othercase;
@@ -3036,7 +3036,7 @@ if (next >= 0) switch(op_code)
     return (unsigned int)c == othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
 
   /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
@@ -3128,7 +3128,7 @@ switch(op_code)
   {
   case OP_CHAR:
   case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
@@ -3358,7 +3358,7 @@ pcre_uint8 classbits[32];
 must not do this for other options (e.g. PCRE_EXTENDED) because they may change
 dynamically as we process the pattern. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 BOOL utf = (options & PCRE_UTF8) != 0;
 pcre_uchar utf_chars[6];
@@ -4150,7 +4150,7 @@ for (;; ptr++)
           goto LONE_SINGLE_CHARACTER;
           }
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
         if (utf)
           {                           /* Braces are required because the */
           GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
@@ -4200,7 +4200,9 @@ for (;; ptr++)
         matching for characters > 127 is available only if UCP support is
         available. */
 
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+        if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
+#elif defined  SUPPORT_UTF
         if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
 #elif !(defined COMPILE_PCRE8)
         if (d > 255)
@@ -4214,7 +4216,11 @@ for (;; ptr++)
           they fit with the basic range. */
 
 #ifdef SUPPORT_UCP
+#ifndef COMPILE_PCRE8
+          if (utf && (options & PCRE_CASELESS) != 0)
+#else
           if ((options & PCRE_CASELESS) != 0)
+#endif
             {
             unsigned int occ, ocd;
             unsigned int cc = c;
@@ -4257,12 +4263,25 @@ for (;; ptr++)
 
           *class_uchardata++ = XCL_RANGE;
 #ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+          if (utf)
+            {
+            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
+            }
+          else
+            {
+            *class_uchardata++ = c;
+            *class_uchardata++ = d;
+            }
+#else
           class_uchardata += PRIV(ord2utf)(c, class_uchardata);
           class_uchardata += PRIV(ord2utf)(d, class_uchardata);
-#else
+#endif
+#else /* SUPPORT_UTF */
           *class_uchardata++ = c;
           *class_uchardata++ = d;
-#endif
+#endif /* SUPPORT_UTF */
 
           /* With UCP support, we are done. Without UCP support, there is no
           caseless matching for UTF characters > 127; we can use the bit map
@@ -4270,9 +4289,26 @@ for (;; ptr++)
           can still use  */
 
 #ifdef SUPPORT_UCP
-          continue;    /* With next character in the class */
-#else
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+          if (utf)
+#endif
+            continue;    /* With next character in the class */
+#endif  /* SUPPORT_UCP */
+
+#if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
+          if (utf)
+            {
+            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 127;
+            }
+          else
+            {
+            if (c > 255) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 255;
+            }
+#elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
           if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
           /* Adjust upper limit and fall through to set up the map */
           d = 127;
@@ -4280,10 +4316,9 @@ for (;; ptr++)
           if (c > 255) continue;
           /* Adjust upper limit and fall through to set up the map */
           d = 255;
-#endif  /* SUPPORT_UTF */
-#endif  /* SUPPORT_UCP */
+#endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
           }
-#endif  /* SUPPORT_UTF8 || COMPILE_PCRE16 */
+#endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */
 
         /* We use the bit map for 8 bit mode, or when the characters fall
         partially or entirely to [0-255] ([0-127] for UCP) ranges. */
@@ -4314,7 +4349,9 @@ for (;; ptr++)
 
       /* Handle a character that cannot go in the bit map */
 
-#ifdef SUPPORT_UTF
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+      if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
+#elif defined SUPPORT_UTF
       if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
 #elif !(defined COMPILE_PCRE8)
       if (c > 255)
@@ -4324,13 +4361,26 @@ for (;; ptr++)
         xclass = TRUE;
         *class_uchardata++ = XCL_SINGLE;
 #ifdef SUPPORT_UTF
-        class_uchardata += PRIV(ord2utf)(c, class_uchardata);
-#else
-        *class_uchardata++ = c;
+#ifndef COMPILE_PCRE8
+        /* In non 8 bit mode, we can get here even
+        if we are not in UTF mode. */
+        if (!utf)
+          *class_uchardata++ = c;
+        else
 #endif
+          class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+#else /* SUPPORT_UTF */
+        *class_uchardata++ = c;
+#endif /* SUPPORT_UTF */
 
 #ifdef SUPPORT_UCP
+#ifdef COMPILE_PCRE8
         if ((options & PCRE_CASELESS) != 0)
+#else
+        /* In non 8 bit mode, we can get here even
+        if we are not in UTF mode. */
+        if (utf && (options & PCRE_CASELESS) != 0)
+#endif
           {
           unsigned int othercase;
           if ((othercase = UCD_OTHERCASE(c)) != c)
@@ -4415,7 +4465,7 @@ for (;; ptr++)
       /* For a single, positive character, get the value into mcbuffer, and
       then we can handle this with the normal one-character code. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf && class_lastchar > 127)
         mclength = PRIV(ord2utf)(class_lastchar, mcbuffer);
       else
@@ -4843,7 +4893,7 @@ for (;; ptr++)
 
     else if (*previous == OP_CLASS ||
              *previous == OP_NCLASS ||
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
              *previous == OP_XCLASS ||
 #endif
              *previous == OP_REF ||
@@ -6635,7 +6685,7 @@ for (;; ptr++)
     a value > 127. We set its representation in the length/buffer, and then
     handle it as a data character. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (utf && c > 127)
       mclength = PRIV(ord2utf)(c, mcbuffer);
     else
@@ -7471,12 +7521,12 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 utf = (options & PCRE_UTF8) != 0;
 
-/* Can't support UTF8 unless PCRE has been compiled to include the code. The
+/* Can't support UTF unless PCRE has been compiled to include the code. The
 return of an error code from PRIV(valid_utf)() is a new feature, introduced in
 release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
 not used here. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
      (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
   {
@@ -7673,7 +7723,7 @@ code = (pcre_uchar *)codestart;
   &firstchar, &reqchar, NULL, cd, NULL);
 re->top_bracket = cd->bracount;
 re->top_backref = cd->top_backref;
-re->flags = cd->external_flags;
+re->flags = cd->external_flags | PCRE_MODE;
 
 if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
 
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index a5bc745..58197ce 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -413,7 +413,7 @@ const pcre_uchar *start_subject = md->start_subject;
 const pcre_uchar *end_subject = md->end_subject;
 const pcre_uchar *start_code = md->start_code;
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 BOOL utf = (md->poptions & PCRE_UTF8) != 0;
 #else
 BOOL utf = FALSE;
@@ -471,7 +471,7 @@ if (*first_op == OP_REVERSE)
   /* If we can't go back the amount required for the longest lookbehind
   pattern, go back as far as we can; some alternatives may still be viable. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   /* In character mode we have to step back character by character */
 
   if (utf)
@@ -603,9 +603,9 @@ for (;;)
   if (ptr < end_subject)
     {
     clen = 1;        /* Number of bytes in the character */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     if (utf) { GETCHARLEN(c, ptr, clen); } else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
     c = *ptr;
     }
   else
@@ -692,9 +692,9 @@ for (;;)
     if (coptable[codevalue] > 0)
       {
       dlen = 1;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
       d = code[coptable[codevalue]];
       if (codevalue >= OP_TYPESTAR)
         {
@@ -957,8 +957,8 @@ for (;;)
           {
           const pcre_uchar *temp = ptr - 1;
           if (temp < md->start_used_ptr) md->start_used_ptr = temp;
-#ifdef SUPPORT_UTF8
-          if (utf) BACKCHAR(temp);
+#ifdef SUPPORT_UTF
+          if (utf) { BACKCHAR(temp); }
 #endif
           GETCHARTEST(d, temp);
 #ifdef SUPPORT_UCP
@@ -1983,28 +1983,28 @@ for (;;)
       case OP_CHARI:
       if (clen == 0) break;
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
       if (utf)
         {
         if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
           {
           unsigned int othercase;
-          if (c < 128) othercase = fcc[c]; else
-
-          /* If we have Unicode property support, we can use it to test the
-          other case of the character. */
-
+          if (c < 128)
+            othercase = fcc[c];
+          else
+            /* If we have Unicode property support, we can use it to test the
+            other case of the character. */
 #ifdef SUPPORT_UCP
-          othercase = UCD_OTHERCASE(c);
+            othercase = UCD_OTHERCASE(c);
 #else
-          othercase = NOTACHAR;
+            othercase = NOTACHAR;
 #endif
 
           if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
           }
         }
       else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
       /* Not UTF mode */
         {
         if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
@@ -2207,7 +2207,7 @@ for (;;)
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2215,7 +2215,7 @@ for (;;)
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2254,7 +2254,7 @@ for (;;)
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2262,7 +2262,7 @@ for (;;)
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2299,7 +2299,7 @@ for (;;)
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2307,7 +2307,7 @@ for (;;)
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2336,7 +2336,7 @@ for (;;)
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2344,7 +2344,7 @@ for (;;)
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2380,7 +2380,7 @@ for (;;)
         unsigned int otherd = NOTACHAR;
         if (caseless)
           {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
           if (utf && d >= 128)
             {
 #ifdef SUPPORT_UCP
@@ -2388,7 +2388,7 @@ for (;;)
 #endif  /* SUPPORT_UCP */
             }
           else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
           otherd = fcc[d];
           }
         if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
@@ -2438,7 +2438,7 @@ for (;;)
         else
          {
          ecode = code + GET(code, 1);
-         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE);
+         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
          }
 
         /* At this point, isinclass is set for all kinds of class, and ecode
@@ -2994,10 +2994,17 @@ Returns:          > 0 => number of match offset pairs placed in offsets
                  < -1 => some kind of unexpected problem
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
   const char *subject, int length, int start_offset, int options, int *offsets,
   int offsetcount, int *workspace, int wscount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
+  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount, int *workspace, int wscount)
+#endif
 {
 real_pcre *re = (real_pcre *)argument_re;
 dfa_match_data match_block;
@@ -3062,14 +3069,15 @@ if (re->magic_number != MAGIC_NUMBER)
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   if (study != NULL) study = &internal_study;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 
 /* Set some local values */
 
-current_subject = (const unsigned char *)subject + start_offset;
-end_subject = (const unsigned char *)subject + length;
+current_subject = (const pcre_uchar *)subject + start_offset;
+end_subject = (const pcre_uchar *)subject + length;
 req_char_ptr = current_subject - 1;
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
 utf = (re->options & PCRE_UTF8) != 0;
 #else
@@ -3083,7 +3091,7 @@ anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
 
 md->start_code = (const pcre_uchar *)argument_re +
     re->name_table_offset + re->name_count * re->name_entry_size;
-md->start_subject = (const unsigned char *)subject;
+md->start_subject = (const pcre_uchar *)subject;
 md->end_subject = end_subject;
 md->start_offset = start_offset;
 md->moptions = options;
diff --git a/pcre_exec.c b/pcre_exec.c
index c5932f7..9aa07a7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -2968,7 +2968,7 @@ for (;;)
           MRRETURN(MATCH_NOMATCH);
           }
         GETCHARINCTEST(c, eptr);
-        if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+        if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
         }
 
       /* If max == min we can continue with the main loop without the
@@ -2992,7 +2992,7 @@ for (;;)
             MRRETURN(MATCH_NOMATCH);
             }
           GETCHARINCTEST(c, eptr);
-          if (!PRIV(xclass)(c, data)) MRRETURN(MATCH_NOMATCH);
+          if (!PRIV(xclass)(c, data, utf)) MRRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -3015,7 +3015,7 @@ for (;;)
 #else
           c = *eptr;
 #endif
-          if (!PRIV(xclass)(c, data)) break;
+          if (!PRIV(xclass)(c, data, utf)) break;
           eptr += len;
           }
         for(;;)
@@ -6113,6 +6113,7 @@ if (re->magic_number != MAGIC_NUMBER)
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   if (study != NULL) study = &internal_study;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 
 /* Set up other data */
 
diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c
index 2bdf24b..078f5fd 100644
--- a/pcre_fullinfo.c
+++ b/pcre_fullinfo.c
@@ -91,6 +91,7 @@ if (re->magic_number != MAGIC_NUMBER)
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   if (study != NULL) study = &internal_study;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 
 switch (what)
   {
diff --git a/pcre_get.c b/pcre_get.c
index 330035f..10a54a7 100644
--- a/pcre_get.c
+++ b/pcre_get.c
@@ -65,8 +65,13 @@ Returns:      the number of the named parentheses, or a negative number
                 (PCRE_ERROR_NOSUBSTRING) if not found
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringnumber(const pcre *code, const char *stringname)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringnumber(const pcre *code, PCRE_SPTR16 stringname)
+#endif
 {
 int rc;
 int entrysize;
@@ -87,7 +92,8 @@ while (top > bot)
   {
   int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
   if (c == 0) return (entry[0] << 8) + entry[1];
   if (c > 0) bot = mid + 1; else top = mid;
   }
@@ -114,9 +120,15 @@ Returns:      the length of each entry, or a negative number
                 (PCRE_ERROR_NOSUBSTRING) if not found
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
   char **firstptr, char **lastptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_stringtable_entries(const pcre *code, PCRE_SPTR16 stringname,
+  PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
+#endif
 {
 int rc;
 int entrysize;
@@ -138,23 +150,31 @@ while (top > bot)
   {
   int mid = (top + bot) / 2;
   pcre_uchar *entry = nametable + entrysize*mid;
-  int c = strcmp(stringname, (char *)(entry + 2));
+  int c = STRCMP_UC_UC((pcre_uchar *)stringname,
+    (pcre_uchar *)(entry + IMM2_SIZE));
   if (c == 0)
     {
     pcre_uchar *first = entry;
     pcre_uchar *last = entry;
     while (first > nametable)
       {
-      if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
       first -= entrysize;
       }
     while (last < lastentry)
       {
-      if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
+      if (STRCMP_UC_UC((pcre_uchar *)stringname,
+        (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
       last += entrysize;
       }
+#ifdef COMPILE_PCRE8
     *firstptr = (char *)first;
     *lastptr = (char *)last;
+#else
+    *firstptr = (PCRE_SCHAR16 *)first;
+    *lastptr = (PCRE_SCHAR16 *)last;
+#endif
     return entrysize;
     }
   if (c > 0) bot = mid + 1; else top = mid;
@@ -182,16 +202,29 @@ Returns:       the number of the first that is set,
                or a negative number on error
 */
 
+#ifdef COMPILE_PCRE8
 static int
 get_first_set(const pcre *code, const char *stringname, int *ovector)
+#else
+static int
+get_first_set(const pcre *code, PCRE_SPTR16 stringname, int *ovector)
+#endif
 {
 const real_pcre *re = (const real_pcre *)code;
 int entrysize;
-char *first, *last;
+pcre_uchar *first, *last;
 pcre_uchar *entry;
+#ifdef COMPILE_PCRE8
 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
   return pcre_get_stringnumber(code, stringname);
-entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
+entrysize = pcre_get_stringtable_entries(code, stringname,
+  (char **)&first, (char **)&last);
+#else
+if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
+  return pcre16_get_stringnumber(code, stringname);
+entrysize = pcre16_get_stringtable_entries(code, stringname,
+  (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
+#endif
 if (entrysize <= 0) return entrysize;
 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
   {
@@ -231,9 +264,15 @@ Returns:         if successful:
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
   int stringnumber, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  int stringnumber, PCRE_SCHAR16 *buffer, int size)
+#endif
 {
 int yield;
 if (stringnumber < 0 || stringnumber >= stringcount)
@@ -241,7 +280,7 @@ if (stringnumber < 0 || stringnumber >= stringcount)
 stringnumber *= 2;
 yield = ovector[stringnumber+1] - ovector[stringnumber];
 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
-memcpy(buffer, subject + ovector[stringnumber], yield);
+memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
 buffer[yield] = 0;
 return yield;
 }
@@ -276,13 +315,23 @@ Returns:         if successful:
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
   int stringcount, const char *stringname, char *buffer, int size)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_copy_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+  int stringcount, PCRE_SPTR16 stringname, PCRE_SCHAR16 *buffer, int size)
+#endif
 {
 int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#else
+return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
+#endif
 }
 
 
@@ -308,29 +357,39 @@ Returns:         if successful: 0
                    PCRE_ERROR_NOMEMORY (-6) failed to get store
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
   const char ***listptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  PCRE_SPTR16 **listptr)
+#endif
 {
 int i;
-int size = sizeof(char *);
+int size = sizeof(pcre_uchar *);
 int double_count = stringcount * 2;
-char **stringlist;
-char *p;
+pcre_uchar **stringlist;
+pcre_uchar *p;
 
 for (i = 0; i < double_count; i += 2)
-  size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
+  size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
 
-stringlist = (char **)(pcre_malloc)(size);
+stringlist = (pcre_uchar **)(pcre_malloc)(size);
 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
 
+#ifdef COMPILE_PCRE8
 *listptr = (const char **)stringlist;
-p = (char *)(stringlist + stringcount + 1);
+#else
+*listptr = (PCRE_SPTR16 *)stringlist;
+#endif
+p = (pcre_uchar *)(stringlist + stringcount + 1);
 
 for (i = 0; i < double_count; i += 2)
   {
   int len = ovector[i+1] - ovector[i];
-  memcpy(p, subject + ovector[i], len);
+  memcpy(p, subject + ovector[i], IN_UCHARS(len));
   *stringlist++ = p;
   p += len;
   *p++ = 0;
@@ -353,8 +412,13 @@ Argument:   the result of a previous pcre_get_substring_list()
 Returns:    nothing
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring_list(const char **pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring_list(PCRE_SPTR16 *pointer)
+#endif
 {
 (pcre_free)((void *)pointer);
 }
@@ -386,21 +450,31 @@ Returns:         if successful:
                    PCRE_ERROR_NOSUBSTRING (-7) substring not present
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_substring(const char *subject, int *ovector, int stringcount,
   int stringnumber, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
+  int stringnumber, PCRE_SPTR16 *stringptr)
+#endif
 {
 int yield;
-char *substring;
+pcre_uchar *substring;
 if (stringnumber < 0 || stringnumber >= stringcount)
   return PCRE_ERROR_NOSUBSTRING;
 stringnumber *= 2;
 yield = ovector[stringnumber+1] - ovector[stringnumber];
-substring = (char *)(pcre_malloc)(yield + 1);
+substring = (pcre_uchar *)(pcre_malloc)(IN_UCHARS(yield + 1));
 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
-memcpy(substring, subject + ovector[stringnumber], yield);
+memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
 substring[yield] = 0;
-*stringptr = substring;
+#ifdef COMPILE_PCRE8
+*stringptr = (const char *)substring;
+#else
+*stringptr = (PCRE_SPTR16)substring;
+#endif
 return yield;
 }
 
@@ -433,13 +507,23 @@ Returns:         if successful:
                    PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
   int stringcount, const char *stringname, const char **stringptr)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_get_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
+  int stringcount, PCRE_SPTR16 stringname, PCRE_SPTR16 *stringptr)
+#endif
 {
 int n = get_first_set(code, stringname, ovector);
 if (n <= 0) return n;
+#ifdef COMPILE_PCRE8
 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
+#else
+return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
+#endif
 }
 
 
@@ -456,8 +540,13 @@ Argument:   the result of a previous pcre_get_substring()
 Returns:    nothing
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
 pcre_free_substring(const char *pointer)
+#else
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
+pcre16_free_substring(PCRE_SPTR16 pointer)
+#endif
 {
 (pcre_free)((void *)pointer);
 }
diff --git a/pcre_info.c b/pcre_info.c
index e7b3730..52d593a 100644
--- a/pcre_info.c
+++ b/pcre_info.c
@@ -88,6 +88,7 @@ if (re->magic_number != MAGIC_NUMBER)
   re = PRIV(try_flipped)(re, &internal_re, NULL, NULL);
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   }
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
 if (first_char != NULL)
   *first_char = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
diff --git a/pcre_internal.h b/pcre_internal.h
index e748809..9a20e73 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -832,15 +832,21 @@ are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
 the restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */
 
-#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
-#define PCRE_FIRSTSET      0x0002  /* first_char is set */
-#define PCRE_REQCHSET      0x0004  /* req_byte is set */
-#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
-#define PCRE_JCHANGED      0x0010  /* j option used in regex */
-#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN       0x0040  /* pattern contains (*THEN) */
-#define PCRE_FCH_CASELESS  0x0080  /* caseless first char */
-#define PCRE_RCH_CASELESS  0x0100  /* caseless requested char */
+#ifdef COMPILE_PCRE8
+#define PCRE_MODE          0x0001  /* compiled in 8 bit mode */
+#endif
+#ifdef COMPILE_PCRE16
+#define PCRE_MODE          0x0002  /* compiled in 16 bit mode */
+#endif
+#define PCRE_FIRSTSET      0x0010  /* first_char is set */
+#define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
+#define PCRE_REQCHSET      0x0040  /* req_byte is set */
+#define PCRE_RCH_CASELESS  0x0080  /* caseless requested char */
+#define PCRE_STARTLINE     0x0100  /* start after \n for multiline */
+#define PCRE_NOPARTIAL     0x0200  /* can't use partial with this regex */
+#define PCRE_JCHANGED      0x0400  /* j option used in regex */
+#define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
 
 /* Flags for the "extra" block produced by pcre_study(). */
 
@@ -917,7 +923,7 @@ for) in a minority area (EBCDIC platforms), this is not sensible. Any
 application that did need both could compile two versions of the library, using
 macros to give the functions distinct names. */
 
-#ifndef SUPPORT_UTF8
+#ifndef SUPPORT_UTF
 
 /* UTF-8 support is not enabled; use the platform-dependent character literals
 so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
@@ -1186,7 +1192,7 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
 
-#else  /* SUPPORT_UTF8 */
+#else  /* SUPPORT_UTF */
 
 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
 works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@@ -1446,7 +1452,7 @@ only. */
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
 
 /* Escape items that are just an encoding of a particular data value. */
 
@@ -2249,7 +2255,7 @@ extern real_pcre        *PRIV(try_flipped)(const real_pcre *, real_pcre *,
 extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
 extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
                            int *, BOOL);
-extern BOOL              PRIV(xclass)(int, const pcre_uchar *);
+extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);
 
 #ifdef SUPPORT_JIT
 extern void              PRIV(jit_compile)(const real_pcre *, pcre_extra *);
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 8c6b206..50376b9 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -1311,7 +1311,7 @@ static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar*
 {
 /* Detects if the character and its othercase has only 1 bit difference. */
 unsigned int c, oc, bit;
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
 int n;
 #endif
 
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index d82af25..5b66bac 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -56,6 +56,8 @@ POSSIBILITY OF SUCH DAMAGE.
  Non-letter characters:
    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+   \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
+   \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
  Newlines:
    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
@@ -99,13 +101,19 @@ int main(void)
 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
 #endif
 
-#define MUA     (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define MUAP    (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMUA    (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
-#define CMUAP   (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define MA      (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
-#define MAP     (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
-#define CMA     (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MUA	(PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define MUAP	(PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMUA	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
+#define CMUAP	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define MA	(PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+#define MAP	(PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
+#define CMA	(PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
+
+#define OFFSET_MASK	0xffff
+#define F_DIFF		0x010000
+#define F_FORCECONV	0x020000
+#define F_NO8		0x100000
+#define F_NO16		0x200000
 
 struct regression_test_case {
 	int flags;
@@ -521,7 +529,7 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
 	{ MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
 	{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
-	{ MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
+	{ MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
 	{ MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
 	{ MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
 	{ MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
@@ -535,11 +543,11 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
 	{ MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
 	{ MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
-	{ MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
+	{ MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
-	{ MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
+	{ MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
 	{ MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
@@ -601,6 +609,20 @@ static struct regression_test_case regression_test_cases[] = {
 	{ MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
 	{ MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
 
+	/* 16 bit specific tests. */
+	{ CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
+	{ CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+	{ CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
+	{ CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
+	{ CMA, 0 | F_FORCECONV | F_NO8, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
+	{ CMA, 0 | F_FORCECONV | F_NO8, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
+	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
+	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
+	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
+	{ CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
+	{ CMA, 0 | F_FORCECONV | F_NO8, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
+	{ CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
+
 	/* Deep recursion. */
 	{ MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
 	{ MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
@@ -721,7 +743,7 @@ static int regression_tests(void)
 	int ovector8_2[32];
 	int return_value8_1, return_value8_2;
 	int utf8 = 0, ucp8 = 0;
-	int disabled_flags8 = PCRE_BUG;
+	int disabled_flags8 = 0;
 #endif
 #ifdef SUPPORT_PCRE16
 	pcre *re16;
@@ -730,7 +752,7 @@ static int regression_tests(void)
 	int ovector16_2[32];
 	int return_value16_1, return_value16_2;
 	int utf16 = 0, ucp16 = 0;
-	int disabled_flags16 = PCRE_BUG;
+	int disabled_flags16 = 0;
 	int length16;
 #endif
 
@@ -765,9 +787,11 @@ static int regression_tests(void)
 
 		error = NULL;
 #ifdef SUPPORT_PCRE8
-		re8 = pcre_compile(current->pattern,
-			current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
-			&error, &err_offs, NULL);
+		re8 = NULL;
+		if (!(current->start_offset & F_NO8))
+			re8 = pcre_compile(current->pattern,
+				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+				&error, &err_offs, NULL);
 
 		extra8 = NULL;
 		if (re8) {
@@ -784,17 +808,20 @@ static int regression_tests(void)
 				pcre_free(re8);
 				re8 = NULL;
 			}
-		} else if (utf8 && ucp8)
+		} else if (utf8 && ucp8 && !(current->start_offset & F_NO8))
 			printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
 #endif
 #ifdef SUPPORT_PCRE16
-		if (current->flags & PCRE_UTF8)
+		if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
 			convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
 		else
 			copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
-		re16 = pcre16_compile(regtest_buf,
-			current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
-			&error, &err_offs, NULL);
+
+		re16 = NULL;
+		if (!(current->start_offset & F_NO16))
+			re16 = pcre16_compile(regtest_buf,
+				current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+				&error, &err_offs, NULL);
 
 		extra16 = NULL;
 		if (re16) {
@@ -811,7 +838,7 @@ static int regression_tests(void)
 				pcre_free(re16);
 				re16 = NULL;
 			}
-		} else if (utf16 && ucp16)
+		} else if (utf16 && ucp16 && !(current->start_offset & F_NO16))
 			printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
 #endif
 
@@ -822,16 +849,15 @@ static int regression_tests(void)
 #ifdef SUPPORT_PCRE8
 		return_value8_1 = -1000;
 		return_value8_2 = -1000;
+		for (i = 0; i < 32; ++i)
+			ovector8_1[i] = -2;
+		for (i = 0; i < 32; ++i)
+			ovector8_2[i] = -2;
 		if (re8) {
 			setstack(extra8, 0);
-			for (i = 0; i < 32; ++i)
-				ovector8_1[i] = -2;
-			return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+			return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
 				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
-
-			for (i = 0; i < 32; ++i)
-				ovector8_2[i] = -2;
-			return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+			return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
 				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
 		}
 #endif
@@ -839,32 +865,30 @@ static int regression_tests(void)
 #ifdef SUPPORT_PCRE16
 		return_value16_1 = -1000;
 		return_value16_2 = -1000;
+		for (i = 0; i < 32; ++i)
+			ovector16_1[i] = -2;
+		for (i = 0; i < 32; ++i)
+			ovector16_2[i] = -2;
 		if (re16) {
 			setstack(extra16, 0);
-			if (current->flags & PCRE_UTF8)
+			if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
 				length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
 			else
 				length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
-
-			for (i = 0; i < 32; ++i)
-				ovector16_1[i] = -2;
-			return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+			return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
 				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
-
-			for (i = 0; i < 32; ++i)
-				ovector16_2[i] = -2;
-			return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+			return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
 				current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
 		}
 #endif
 
-		/* If PCRE_BUG is set, just run the test, but do not compare the results.
+		/* If F_DIFF is set, just run the test, but do not compare the results.
 		Segfaults can still be captured. */
 
 		is_succesful = 1;
-		if (!(current->flags & PCRE_BUG)) {
+		if (!(current->start_offset & F_DIFF)) {
 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
-			if (utf8 == utf16) {
+			if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
 				/* All results must be the same. */
 				if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
 					printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
@@ -947,7 +971,7 @@ static int regression_tests(void)
 		}
 #endif
 
-		/* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
+		/* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
 		printf(".");
 		fflush(stdout);
 		current++;
@@ -962,5 +986,4 @@ static int regression_tests(void)
 	}
 }
 
-
 /* End of pcre_jit_test.c */
diff --git a/pcre_newline.c b/pcre_newline.c
index d618b80..2343f73 100644
--- a/pcre_newline.c
+++ b/pcre_newline.c
@@ -84,7 +84,7 @@ if (utf)
   GETCHAR(c, ptr);
   }
 else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   c = *ptr;
 
 if (type == NLTYPE_ANYCRLF) switch(c)
@@ -150,7 +150,7 @@ if (utf)
   GETCHAR(c, ptr);
   }
 else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   c = *ptr;
 
 if (type == NLTYPE_ANYCRLF) switch(c)
diff --git a/pcre_ord2utf8.c b/pcre_ord2utf8.c
index b374987..67cf529 100644
--- a/pcre_ord2utf8.c
+++ b/pcre_ord2utf8.c
@@ -65,7 +65,7 @@ Returns:     number of characters placed in the buffer
 int
 PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 
 register int i, j;
 
@@ -88,7 +88,7 @@ return i + 1;
 #else
 
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
-(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer);  /* called when SUPPORT_UTF is not defined. */
 return 0;
 
 #endif
diff --git a/pcre_study.c b/pcre_study.c
index 3f25c3a..493108e 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -323,7 +323,7 @@ for (;;)
 
     /* Check a class for variable quantification */
 
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
     /* Fall through */
@@ -824,7 +824,7 @@ do
       case OP_SOM:
       case OP_THEN:
       case OP_THEN_ARG:
-#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       case OP_XCLASS:
 #endif
       return SSB_FAIL;
@@ -1325,6 +1325,16 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER)
   return NULL;
   }
 
+if ((re->flags & PCRE_MODE) == 0)
+  {
+#ifdef COMPILE_PCRE8
+  *errorptr = "argument is compiled in 16 bit mode";
+#else
+  *errorptr = "argument is compiled in 8 bit mode";
+#endif
+  return NULL;
+  }
+
 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
   {
   *errorptr = "unknown or incorrect option bit(s) set";
@@ -1346,9 +1356,16 @@ if ((re->options & PCRE_ANCHORED) == 0 &&
   /* Set the character tables in the block that is passed around */
 
   tables = re->tables;
+
+#ifdef COMPILE_PCRE8
   if (tables == NULL)
     (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
     (void *)(&tables));
+#else
+  if (tables == NULL)
+    (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+    (void *)(&tables));
+#endif
 
   compile_block.lcc = tables + lcc_offset;
   compile_block.fcc = tables + fcc_offset;
diff --git a/pcre_valid_utf8.c b/pcre_valid_utf8.c
index bbab87f..05d82f9 100644
--- a/pcre_valid_utf8.c
+++ b/pcre_valid_utf8.c
@@ -105,7 +105,7 @@ Returns:       = 0    if the string is a valid UTF-8 string
 int
 PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;
 
 if (length < 0)
@@ -288,7 +288,7 @@ for (p = string; length-- > 0; p++)
     }
   }
 
-#else  /* SUPPORT_UTF8 */
+#else  /* SUPPORT_UTF */
 (void)(string);  /* Keep picky compilers happy */
 (void)(length);
 #endif
diff --git a/pcre_xclass.c b/pcre_xclass.c
index cdb9d07..1c2b65a 100644
--- a/pcre_xclass.c
+++ b/pcre_xclass.c
@@ -64,11 +64,17 @@ Returns:      TRUE if character matches, else FALSE
 */
 
 BOOL
-PRIV(xclass)(int c, const pcre_uchar *data)
+PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
 {
 int t;
 BOOL negated = (*data & XCL_NOT) != 0;
 
+(void)utf;
+#ifdef COMPILE_PCRE8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
 /* Character values < 256 are matched against a bitmap, if one is present. If
 not, we still carry on, because there may be ranges that start below 256 in the
 additional data. */
@@ -91,13 +97,30 @@ while ((t = *data++) != XCL_END)
   int x, y;
   if (t == XCL_SINGLE)
     {
-    GETCHARINC(x, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      x = *data++;
     if (c == x) return !negated;
     }
   else if (t == XCL_RANGE)
     {
-    GETCHARINC(x, data);
-    GETCHARINC(y, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      {
+      x = *data++;
+      y = *data++;
+      }
     if (c >= x && c <= y) return !negated;
     }
 
diff --git a/pcreposix.c b/pcreposix.c
index 2dc1561..0426e2e 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -401,6 +401,7 @@ switch(rc)
   case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
   case PCRE_ERROR_BADUTF8: return REG_INVARG;
   case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
+  case PCRE_ERROR_BADMODE: return REG_INVARG;
   default: return REG_ASSERT;
   }
 }
author	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-10 02:20:06 +0000
committer	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-10 02:20:06 +0000
commit	669e6f0bbc3b07f6df3b0d0cafba3555e39e433c (patch)
tree	37c97c3fa732981cf8d2dbed54d27cca37fa8fac
parent	24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (diff)
download	pcre-669e6f0bbc3b07f6df3b0d0cafba3555e39e433c.tar.gz