Adding --enable-utf option rather than --enable-utf16. --enable-utf8 is kept for compatibility reasons. And fixing other, minor issues.

git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@794 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-08 07:36:41 +0000
committer: zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2011-12-08 07:36:41 +0000
commit: 24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (patch)
tree: 3a50f360cdab48f89ac140f342640214a92e1449
parent: 4b661f8c6abbe9be96af67b9d5547bb96359cc99 (diff)
download: pcre-24054b0ee8c34e475c8ecc21938f7139f1ca6d2c.tar.gz
11 files changed, 104 insertions, 146 deletions
diff --git a/configure.ac b/configure.ac
index ff516c7..ef1524f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,7 +121,7 @@ AC_ARG_ENABLE(pcre16,
 AC_ARG_ENABLE(cpp,
               AS_HELP_STRING([--disable-cpp],
                              [disable C++ support]),
-              , enable_cpp=yes)
+              , enable_cpp=unset)
 AC_SUBST(enable_cpp)
 
 # Handle --enable-jit (disabled by default)
@@ -145,19 +145,19 @@ AC_ARG_ENABLE(rebuild-chartables,
 # Handle --enable-utf8 (disabled by default)
 AC_ARG_ENABLE(utf8,
               AS_HELP_STRING([--enable-utf8],
-                             [enable UTF-8 support (incompatible with --enable-ebcdic)]),
+                             [another name for --enable-utf. Kept only for compatibility reasons]),
               , enable_utf8=unset)
 
-# Handle --enable-utf16 (disabled by default)
-AC_ARG_ENABLE(utf16,
-              AS_HELP_STRING([--enable-utf16],
-                             [enable UTF-16 support (incompatible with --enable-ebcdic)]),
-              , enable_utf16=unset)
+# Handle --enable-utf (disabled by default)
+AC_ARG_ENABLE(utf,
+              AS_HELP_STRING([--enable-utf],
+                             [enable UTF-8/16 support (incompatible with --enable-ebcdic)]),
+              , enable_utf=unset)
 
 # Handle --enable-unicode-properties
 AC_ARG_ENABLE(unicode-properties,
               AS_HELP_STRING([--enable-unicode-properties],
-                             [enable Unicode properties support (implies --enable-utf8 and --enable-utf16)]),
+                             [enable Unicode properties support (implies --enable-utf)]),
               , enable_unicode_properties=no)
 
 # Handle --enable-newline=NL
@@ -199,7 +199,7 @@ AC_ARG_ENABLE(bsr-anycrlf,
 # Handle --enable-ebcdic
 AC_ARG_ENABLE(ebcdic,
               AS_HELP_STRING([--enable-ebcdic],
-                             [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
+                             [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
               , enable_ebcdic=no)
 
 # Handle --disable-stack-for-recursion
@@ -263,24 +263,14 @@ AC_ARG_WITH(match-limit-recursion,
                            [default limit on internal recursion (default=MATCH_LIMIT)]),
             , with_match_limit_recursion=MATCH_LIMIT)
 
-# Make sure that if enable_utf8 was set, that enable_pcre8 support is enabled
-if test "x$enable_utf8" = "xyes"
+# Copy enable_utf8 value to enable_utf for compatibility reasons
+if test "x$enable_utf8" != "xunset"
 then
-  if test "x$enable_pcre8" = "xno"
+  if test "x$enable_utf" != "xunset"
   then
-    AC_MSG_ERROR([support for UTF-8 requires pcre library with 8 bit characters])
+    AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
   fi
-  enable_pcre8=yes
-fi
-
-# Make sure that if enable_utf16 was set, that enable_pcre16 support is enabled
-if test "x$enable_utf16" = "xyes"
-then
-  if test "x$enable_pcre16" = "xno"
-  then
-    AC_MSG_ERROR([support for UTF-16 requires pcre library with 16 bit characters])
-  fi
-  enable_pcre16=yes
+  enable_utf=$enable_utf8
 fi
 
 # Set the default value for pcre8
@@ -301,39 +291,26 @@ then
   AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled])
 fi
 
-# Make sure that if enable_unicode_properties was set, that UTF-8 or UTF-16
-# support enabled.
-#
+# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
 if test "x$enable_unicode_properties" = "xyes"
 then
-  if test "x$enable_utf8" = "xno"
-  then
-    AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
-  fi
-  if test "x$enable_utf16" = "xno"
-  then
-    AC_MSG_ERROR([support for Unicode properties requires UTF-16 support])
-  fi
-  if test "x$enable_pcre8" = "xyes"
-  then
-    enable_utf8=yes
-  fi
-  if test "x$enable_pcre16" = "xyes"
+  if test "x$enable_utf" = "xno"
   then
-    enable_utf16=yes
+    AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support])
   fi
+  enable_utf=yes
 fi
 
-# enable_utf8 is disabled by default.
-if test "x$enable_utf8" = "xunset"
+# enable_utf is disabled by default.
+if test "x$enable_utf" = "xunset"
 then
-  enable_utf8=no
+  enable_utf=no
 fi
 
-# enable_utf16 is disabled by default.
-if test "x$enable_utf16" = "xunset"
+# enable_cpp copies the value of enable_pcre8 by default
+if test "x$enable_cpp" = "xunset"
 then
-  enable_utf16=no
+  enable_cpp=$enable_pcre8
 fi
 
 # Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
@@ -346,20 +323,16 @@ then
 fi
 
 # Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
-# Also check that UTF-8 or UTF-16 support is not requested, because PCRE cannot
-# handle EBCDIC and UTF in the same build. To do so it would need to use different
+# Also check that UTF support is not requested, because PCRE cannot handle
+# EBCDIC and UTF in the same build. To do so it would need to use different
 # character constants depending on the mode.
 #
 if test "x$enable_ebcdic" = "xyes"
 then
   enable_rebuild_chartables=yes
-  if test "x$enable_utf8" = "xyes"
+  if test "x$enable_utf" = "xyes"
   then
-    AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
-  fi
-  if test "x$enable_utf16" = "xyes"
-  then
-    AC_MSG_ERROR([support for EBCDIC and UTF-16 cannot be enabled at the same time])
+    AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time])
   fi
 fi
 
@@ -502,8 +475,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
 AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
 AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
 AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
-AM_CONDITIONAL(WITH_UTF8, test "x$enable_utf8" = "xyes")
-AM_CONDITIONAL(WITH_UTF16, test "x$enable_utf16" = "xyes")
+AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
 
 # Checks for typedefs, structures, and compiler characteristics.
 
@@ -594,20 +566,12 @@ if test "$enable_pcregrep_jit" = "yes"; then
     Define to enable JIT support in pcregrep.])
 fi
 
-if test "$enable_utf8" = "yes"; then
-  AC_DEFINE([SUPPORT_UTF8], [], [
-    Define to enable support for the UTF-8 Unicode encoding. This will
-    work even in an EBCDIC environment, but it is incompatible with
-    the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
-    *or* ASCII/UTF-8, but not both at once.])
-fi
-
-if test "$enable_utf16" = "yes"; then
-  AC_DEFINE([SUPPORT_UTF16], [], [
-    Define to enable support for the UTF-16 Unicode encoding. This will
-    work even in an EBCDIC environment, but it is incompatible with
-    the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
-    *or* ASCII/UTF-16, but not both at once.])
+if test "$enable_utf" = "yes"; then
+  AC_DEFINE([SUPPORT_UTF], [], [
+    Define to enable support for the UTF-8/16 Unicode encoding. This
+    will work even in an EBCDIC environment, but it is incompatible
+    with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
+    code *or* ASCII/UTF-8/16, but not both at once.])
 fi
 
 if test "$enable_unicode_properties" = "yes"; then
@@ -742,9 +706,9 @@ if test "$enable_ebcdic" = "yes"; then
     character codes, define this macro as 1. On systems that can use
     "configure", this can be done via --enable-ebcdic. PCRE will then
     assume that all input strings are in EBCDIC. If you do not define
-    this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
-    It is not possible to build a version of PCRE that supports both
-    EBCDIC and UTF-8.])
+    this macro, PCRE will assume input strings are ASCII or UTF-8/16
+    Unicode. It is not possible to build a version of PCRE that
+    supports both EBCDIC and UTF-8/16.])
 fi
 
 # Platform specific issues
@@ -869,8 +833,7 @@ $PACKAGE-$VERSION configuration summary:
     Build 16 bit pcre library ....... : ${enable_pcre16}
     Build C++ library ............... : ${enable_cpp}
     Enable JIT compiling support .... : ${enable_jit}
-    Enable UTF-8 support ............ : ${enable_utf8}
-    Enable UTF-16 support ........... : ${enable_utf16}
+    Enable UTF-8/16 support ......... : ${enable_utf}
     Unicode properties .............. : ${enable_unicode_properties}
     Newline char/sequence ........... : ${enable_newline}
     \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
diff --git a/pcre16_ord2utf16.c b/pcre16_ord2utf16.c
index 67c4c5c..99bed29 100644
--- a/pcre16_ord2utf16.c
+++ b/pcre16_ord2utf16.c
@@ -67,7 +67,7 @@ Returns:     number of characters placed in the buffer
 int
 PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 
 /* Checking invalid cvalue character, encoded as invalid UTF-16 character.
 Should never happen in practice. */
@@ -85,11 +85,11 @@ cvalue -= 0x10000;
 *buffer = 0xdc00 | (cvalue & 0x3ff);
 return 2;
 
-#else
+#else /* SUPPORT_UTF */
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
 (void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
 return 0;
-#endif
+#endif /* SUPPORT_UTF */
 }
 
 /* End of pcre16_ord2utf16.c */
diff --git a/pcre16_utf16_utils.c b/pcre16_utf16_utils.c
index 8f970bb..b927458 100644
--- a/pcre16_utf16_utils.c
+++ b/pcre16_utf16_utils.c
@@ -77,7 +77,7 @@ Returns:     the number of characters placed into the output buffer,
 int
 pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 /* This function converts any UTF-16 string to host byte order and optionally removes
 any Byte Order Marks (BOMS). Returns with the remainig length. */
 BOOL same_bo = TRUE;
@@ -108,11 +108,11 @@ while (iptr < end)
     *optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
   }
 
-#else
+#else /* SUPPORT_UTF */
 (void)(output);  /* Keep picky compilers happy */
 (void)(input);
 (void)(keep_boms);
-#endif
+#endif /* SUPPORT_UTF */
 return length;
 }
 
diff --git a/pcre16_valid_utf16.c b/pcre16_valid_utf16.c
index cc3e50e..b8ec699 100644
--- a/pcre16_valid_utf16.c
+++ b/pcre16_valid_utf16.c
@@ -83,7 +83,7 @@ Returns:       = 0    if the string is a valid UTF-16 string
 int
 PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 register PCRE_PUCHAR p;
 register pcre_uchar c;
 
@@ -135,10 +135,10 @@ for (p = string; length-- > 0; p++)
     }
   }
 
-#else  /* SUPPORT_UTF16 */
+#else  /* SUPPORT_UTF */
 (void)(string);  /* Keep picky compilers happy */
 (void)(length);
-#endif
+#endif /* SUPPORT_UTF */
 
 return PCRE_UTF16_ERR0;   /* This indicates success */
 }
diff --git a/pcre_compile.c b/pcre_compile.c
index 223e475..24a7b1c 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4607,12 +4607,7 @@ for (;; ptr++)
       it's a length rather than a small character. */
 
 #ifdef SUPPORT_UTF
-#ifdef COMPILE_PCRE8
-      if (utf && (code[-1] & 0x80) != 0)
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
-      if (utf && (code[-1] & 0xfc00) == 0xdc00)
-#endif /* COMPILE_PCRE8 */
+      if (utf && NOT_FIRSTCHAR(code[-1]))
         {
         pcre_uchar *lastchar = code - 1;
         BACKCHAR(lastchar);
@@ -4625,7 +4620,6 @@ for (;; ptr++)
 
       /* Handle the case of a single charater - either with no UTF support, or
       with UTF disabled, or for a single character UTF character. */
-
         {
         c = code[-1];
         if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
@@ -7438,8 +7432,14 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
   int newnl = 0;
   int newbsr = 0;
 
+#ifdef COMPILE_PCRE8
   if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
+#endif
+#ifdef COMPILE_PCRE16
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+    { skipatstart += 8; options |= PCRE_UTF16; continue; }
+#endif
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
     { skipatstart += 6; options |= PCRE_UCP; continue; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
diff --git a/pcre_config.c b/pcre_config.c
index a7792f7..92c9973 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -73,7 +73,7 @@ pcre16_config(int what, void *where)
 switch (what)
   {
   case PCRE_CONFIG_UTF8:
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
   *((int *)where) = 1;
 #else
   *((int *)where) = 0;
@@ -81,7 +81,7 @@ switch (what)
   break;
 
   case PCRE_CONFIG_UTF16:
-#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
   *((int *)where) = 1;
 #else
   *((int *)where) = 0;
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 7cceaae..a5bc745 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -2683,7 +2683,7 @@ for (;;)
             const pcre_uchar *p = start_subject + local_offsets[rc];
             const pcre_uchar *pp = start_subject + local_offsets[rc+1];
             int charcount = local_offsets[rc+1] - local_offsets[rc];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             if (charcount > 0)
               {
               ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
@@ -2780,7 +2780,7 @@ for (;;)
             const pcre_uchar *p = ptr;
             const pcre_uchar *pp = local_ptr;
             charcount = pp - p;
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             }
           }
@@ -2862,7 +2862,7 @@ for (;;)
             {
             const pcre_uchar *p = start_subject + local_offsets[0];
             const pcre_uchar *pp = start_subject + local_offsets[1];
-            while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+            while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
             ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
             if (repeat_state_offset >= 0)
               { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
@@ -3144,7 +3144,7 @@ else
 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
 back the character offset. */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   {
   int erroroffset;
@@ -3159,17 +3159,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
     return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
     }
-#ifdef COMPILE_PCRE8
-  if (start_offset > 0 && start_offset < length &&
-        (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
-    return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
   if (start_offset > 0 && start_offset < length &&
-        (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
+        NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
   }
 #endif
 
diff --git a/pcre_exec.c b/pcre_exec.c
index 676f4b8..c5932f7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6038,17 +6038,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
     }
 
   /* Check that a start_offset points to the start of a UTF character. */
-#ifdef COMPILE_PCRE8
   if (start_offset > 0 && start_offset < length &&
-      (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
-  if (start_offset > 0 && start_offset < length &&
-      (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
-    return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
   }
 #endif
 
diff --git a/pcre_internal.h b/pcre_internal.h
index 624e07c..e748809 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -57,33 +57,32 @@ the PRIV macro. */
 #define COMPILE_PCRE8
 #endif
 
-/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
+/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The
+"configure" script ensures this, but not everybody uses "configure". */
 
-#if defined EBCDIC && (defined SUPPORT_UTF8 || defined SUPPORT_UTF16)
-#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#if defined SUPPORT_UCP && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
 #endif
 
-/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility
+reasons with existing code. */
 
-#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
-#define SUPPORT_UTF8 1
+#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
 #endif
 
-/* If SUPPORT_UCP is defined, SUPPORT_UTF16 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code.
+Until then we define it if SUPPORT_UTF is defined. */
 
-#if defined SUPPORT_UCP && defined COMPILE_PCRE16 && !defined SUPPORT_UTF16
-#define SUPPORT_UTF16 1
+#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8)
+#define SUPPORT_UTF8 1
 #endif
 
-/* This macro is defined if either UTF-8 or UTF-16 support or both are
-enabled. */
+/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
 
-#if defined SUPPORT_UTF8 || defined SUPPORT_UTF16
-/* Unicode Transformation Format is enabled. */
-#define SUPPORT_UTF 1
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
 #endif
 
 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -524,16 +523,18 @@ capturing parenthesis numbers in back references. */
 
 #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
 
-/* When UTF-8 encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-never be called in byte mode. To make sure they can never even appear when
-UTF-8 support is omitted, we don't even define them. */
+/* When UTF encoding is being used, a character is no longer just a single
+character. The macros for character handling generate simple sequences when
+used in character-mode, and more complicated ones for UTF characters.
+GETCHARLENTEST and other macros are not used when UTF is not supported,
+so they are not defined. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
+
+#ifndef SUPPORT_UTF
 
 /* #define HAS_EXTRALEN(c) */
 /* #define GET_EXTRALEN(c) */
-#ifndef SUPPORT_UTF
+/* #define NOT_FIRSTCHAR(c) */
 #define GETCHAR(c, eptr) c = *eptr;
 #define GETCHARTEST(c, eptr) c = *eptr;
 #define GETCHARINC(c, eptr) c = *eptr++;
@@ -562,6 +563,11 @@ Otherwise it has an undefined behaviour. */
 
 #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
 
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer. */
 
@@ -724,6 +730,11 @@ Otherwise it has an undefined behaviour. */
 
 #define GET_EXTRALEN(c) 1
 
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
 /* Base macro to pick up the low surrogate of a UTF-16 character, not
 advancing the pointer. */
 
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 3b85b85..8c6b206 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -1240,7 +1240,7 @@ OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_T
 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
 /* Copy the integer value to the output buffer */
 #ifdef COMPILE_PCRE16
-OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ASHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
 #endif
 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
@@ -1353,7 +1353,7 @@ if (!ispowerof2(bit))
 
 #ifdef COMPILE_PCRE8
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 if (common->utf && c > 127)
   {
   n = GET_EXTRALEN(*cc);
@@ -1364,13 +1364,13 @@ if (common->utf && c > 127)
     }
   return (n << 8) | bit;
   }
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 return (0 << 8) | bit;
 
 #else /* COMPILE_PCRE8 */
 
 #ifdef COMPILE_PCRE16
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
 if (common->utf && c > 65535)
   {
   if (bit >= (1 << 10))
@@ -1378,7 +1378,7 @@ if (common->utf && c > 65535)
   else
     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
   }
-#endif /* SUPPORT_UTF16 */
+#endif /* SUPPORT_UTF */
 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
 #endif /* COMPILE_PCRE16 */
 
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 8aee260..d82af25 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -616,7 +616,7 @@ static struct regression_test_case regression_test_cases[] = {
 	{ 0, 0, NULL, NULL }
 };
 
-pcre_jit_stack* callback(void *arg)
+static pcre_jit_stack* callback(void *arg)
 {
 	return (pcre_jit_stack *)arg;
 }
author	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-08 07:36:41 +0000
committer	zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2011-12-08 07:36:41 +0000
commit	24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (patch)
tree	3a50f360cdab48f89ac140f342640214a92e1449
parent	4b661f8c6abbe9be96af67b9d5547bb96359cc99 (diff)
download	pcre-24054b0ee8c34e475c8ecc21938f7139f1ca6d2c.tar.gz