summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-08 07:36:41 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-08 07:36:41 +0000
commit24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (patch)
tree3a50f360cdab48f89ac140f342640214a92e1449
parent4b661f8c6abbe9be96af67b9d5547bb96359cc99 (diff)
downloadpcre-24054b0ee8c34e475c8ecc21938f7139f1ca6d2c.tar.gz
Adding --enable-utf option rather than --enable-utf16. --enable-utf8 is kept for compatibility reasons. And fixing other, minor issues.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@794 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--configure.ac115
-rw-r--r--pcre16_ord2utf16.c6
-rw-r--r--pcre16_utf16_utils.c6
-rw-r--r--pcre16_valid_utf16.c6
-rw-r--r--pcre_compile.c14
-rw-r--r--pcre_config.c4
-rw-r--r--pcre_dfa_exec.c18
-rw-r--r--pcre_exec.c10
-rw-r--r--pcre_internal.h59
-rw-r--r--pcre_jit_compile.c10
-rw-r--r--pcre_jit_test.c2
11 files changed, 104 insertions, 146 deletions
diff --git a/configure.ac b/configure.ac
index ff516c7..ef1524f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,7 +121,7 @@ AC_ARG_ENABLE(pcre16,
AC_ARG_ENABLE(cpp,
AS_HELP_STRING([--disable-cpp],
[disable C++ support]),
- , enable_cpp=yes)
+ , enable_cpp=unset)
AC_SUBST(enable_cpp)
# Handle --enable-jit (disabled by default)
@@ -145,19 +145,19 @@ AC_ARG_ENABLE(rebuild-chartables,
# Handle --enable-utf8 (disabled by default)
AC_ARG_ENABLE(utf8,
AS_HELP_STRING([--enable-utf8],
- [enable UTF-8 support (incompatible with --enable-ebcdic)]),
+ [another name for --enable-utf. Kept only for compatibility reasons]),
, enable_utf8=unset)
-# Handle --enable-utf16 (disabled by default)
-AC_ARG_ENABLE(utf16,
- AS_HELP_STRING([--enable-utf16],
- [enable UTF-16 support (incompatible with --enable-ebcdic)]),
- , enable_utf16=unset)
+# Handle --enable-utf (disabled by default)
+AC_ARG_ENABLE(utf,
+ AS_HELP_STRING([--enable-utf],
+ [enable UTF-8/16 support (incompatible with --enable-ebcdic)]),
+ , enable_utf=unset)
# Handle --enable-unicode-properties
AC_ARG_ENABLE(unicode-properties,
AS_HELP_STRING([--enable-unicode-properties],
- [enable Unicode properties support (implies --enable-utf8 and --enable-utf16)]),
+ [enable Unicode properties support (implies --enable-utf)]),
, enable_unicode_properties=no)
# Handle --enable-newline=NL
@@ -199,7 +199,7 @@ AC_ARG_ENABLE(bsr-anycrlf,
# Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic,
AS_HELP_STRING([--enable-ebcdic],
- [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
+ [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
, enable_ebcdic=no)
# Handle --disable-stack-for-recursion
@@ -263,24 +263,14 @@ AC_ARG_WITH(match-limit-recursion,
[default limit on internal recursion (default=MATCH_LIMIT)]),
, with_match_limit_recursion=MATCH_LIMIT)
-# Make sure that if enable_utf8 was set, that enable_pcre8 support is enabled
-if test "x$enable_utf8" = "xyes"
+# Copy enable_utf8 value to enable_utf for compatibility reasons
+if test "x$enable_utf8" != "xunset"
then
- if test "x$enable_pcre8" = "xno"
+ if test "x$enable_utf" != "xunset"
then
- AC_MSG_ERROR([support for UTF-8 requires pcre library with 8 bit characters])
+ AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
fi
- enable_pcre8=yes
-fi
-
-# Make sure that if enable_utf16 was set, that enable_pcre16 support is enabled
-if test "x$enable_utf16" = "xyes"
-then
- if test "x$enable_pcre16" = "xno"
- then
- AC_MSG_ERROR([support for UTF-16 requires pcre library with 16 bit characters])
- fi
- enable_pcre16=yes
+ enable_utf=$enable_utf8
fi
# Set the default value for pcre8
@@ -301,39 +291,26 @@ then
AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled])
fi
-# Make sure that if enable_unicode_properties was set, that UTF-8 or UTF-16
-# support enabled.
-#
+# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
if test "x$enable_unicode_properties" = "xyes"
then
- if test "x$enable_utf8" = "xno"
- then
- AC_MSG_ERROR([support for Unicode properties requires UTF-8 support])
- fi
- if test "x$enable_utf16" = "xno"
- then
- AC_MSG_ERROR([support for Unicode properties requires UTF-16 support])
- fi
- if test "x$enable_pcre8" = "xyes"
- then
- enable_utf8=yes
- fi
- if test "x$enable_pcre16" = "xyes"
+ if test "x$enable_utf" = "xno"
then
- enable_utf16=yes
+ AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support])
fi
+ enable_utf=yes
fi
-# enable_utf8 is disabled by default.
-if test "x$enable_utf8" = "xunset"
+# enable_utf is disabled by default.
+if test "x$enable_utf" = "xunset"
then
- enable_utf8=no
+ enable_utf=no
fi
-# enable_utf16 is disabled by default.
-if test "x$enable_utf16" = "xunset"
+# enable_cpp copies the value of enable_pcre8 by default
+if test "x$enable_cpp" = "xunset"
then
- enable_utf16=no
+ enable_cpp=$enable_pcre8
fi
# Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
@@ -346,20 +323,16 @@ then
fi
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled.
-# Also check that UTF-8 or UTF-16 support is not requested, because PCRE cannot
-# handle EBCDIC and UTF in the same build. To do so it would need to use different
+# Also check that UTF support is not requested, because PCRE cannot handle
+# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
#
if test "x$enable_ebcdic" = "xyes"
then
enable_rebuild_chartables=yes
- if test "x$enable_utf8" = "xyes"
+ if test "x$enable_utf" = "xyes"
then
- AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time])
- fi
- if test "x$enable_utf16" = "xyes"
- then
- AC_MSG_ERROR([support for EBCDIC and UTF-16 cannot be enabled at the same time])
+ AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time])
fi
fi
@@ -502,8 +475,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
-AM_CONDITIONAL(WITH_UTF8, test "x$enable_utf8" = "xyes")
-AM_CONDITIONAL(WITH_UTF16, test "x$enable_utf16" = "xyes")
+AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
# Checks for typedefs, structures, and compiler characteristics.
@@ -594,20 +566,12 @@ if test "$enable_pcregrep_jit" = "yes"; then
Define to enable JIT support in pcregrep.])
fi
-if test "$enable_utf8" = "yes"; then
- AC_DEFINE([SUPPORT_UTF8], [], [
- Define to enable support for the UTF-8 Unicode encoding. This will
- work even in an EBCDIC environment, but it is incompatible with
- the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
- *or* ASCII/UTF-8, but not both at once.])
-fi
-
-if test "$enable_utf16" = "yes"; then
- AC_DEFINE([SUPPORT_UTF16], [], [
- Define to enable support for the UTF-16 Unicode encoding. This will
- work even in an EBCDIC environment, but it is incompatible with
- the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
- *or* ASCII/UTF-16, but not both at once.])
+if test "$enable_utf" = "yes"; then
+ AC_DEFINE([SUPPORT_UTF], [], [
+ Define to enable support for the UTF-8/16 Unicode encoding. This
+ will work even in an EBCDIC environment, but it is incompatible
+ with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
+ code *or* ASCII/UTF-8/16, but not both at once.])
fi
if test "$enable_unicode_properties" = "yes"; then
@@ -742,9 +706,9 @@ if test "$enable_ebcdic" = "yes"; then
character codes, define this macro as 1. On systems that can use
"configure", this can be done via --enable-ebcdic. PCRE will then
assume that all input strings are in EBCDIC. If you do not define
- this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode.
- It is not possible to build a version of PCRE that supports both
- EBCDIC and UTF-8.])
+ this macro, PCRE will assume input strings are ASCII or UTF-8/16
+ Unicode. It is not possible to build a version of PCRE that
+ supports both EBCDIC and UTF-8/16.])
fi
# Platform specific issues
@@ -869,8 +833,7 @@ $PACKAGE-$VERSION configuration summary:
Build 16 bit pcre library ....... : ${enable_pcre16}
Build C++ library ............... : ${enable_cpp}
Enable JIT compiling support .... : ${enable_jit}
- Enable UTF-8 support ............ : ${enable_utf8}
- Enable UTF-16 support ........... : ${enable_utf16}
+ Enable UTF-8/16 support ......... : ${enable_utf}
Unicode properties .............. : ${enable_unicode_properties}
Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
diff --git a/pcre16_ord2utf16.c b/pcre16_ord2utf16.c
index 67c4c5c..99bed29 100644
--- a/pcre16_ord2utf16.c
+++ b/pcre16_ord2utf16.c
@@ -67,7 +67,7 @@ Returns: number of characters placed in the buffer
int
PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
Should never happen in practice. */
@@ -85,11 +85,11 @@ cvalue -= 0x10000;
*buffer = 0xdc00 | (cvalue & 0x3ff);
return 2;
-#else
+#else /* SUPPORT_UTF */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
return 0;
-#endif
+#endif /* SUPPORT_UTF */
}
/* End of pcre16_ord2utf16.c */
diff --git a/pcre16_utf16_utils.c b/pcre16_utf16_utils.c
index 8f970bb..b927458 100644
--- a/pcre16_utf16_utils.c
+++ b/pcre16_utf16_utils.c
@@ -77,7 +77,7 @@ Returns: the number of characters placed into the output buffer,
int
pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
/* This function converts any UTF-16 string to host byte order and optionally removes
any Byte Order Marks (BOMS). Returns with the remainig length. */
BOOL same_bo = TRUE;
@@ -108,11 +108,11 @@ while (iptr < end)
*optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
}
-#else
+#else /* SUPPORT_UTF */
(void)(output); /* Keep picky compilers happy */
(void)(input);
(void)(keep_boms);
-#endif
+#endif /* SUPPORT_UTF */
return length;
}
diff --git a/pcre16_valid_utf16.c b/pcre16_valid_utf16.c
index cc3e50e..b8ec699 100644
--- a/pcre16_valid_utf16.c
+++ b/pcre16_valid_utf16.c
@@ -83,7 +83,7 @@ Returns: = 0 if the string is a valid UTF-16 string
int
PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
{
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
register PCRE_PUCHAR p;
register pcre_uchar c;
@@ -135,10 +135,10 @@ for (p = string; length-- > 0; p++)
}
}
-#else /* SUPPORT_UTF16 */
+#else /* SUPPORT_UTF */
(void)(string); /* Keep picky compilers happy */
(void)(length);
-#endif
+#endif /* SUPPORT_UTF */
return PCRE_UTF16_ERR0; /* This indicates success */
}
diff --git a/pcre_compile.c b/pcre_compile.c
index 223e475..24a7b1c 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4607,12 +4607,7 @@ for (;; ptr++)
it's a length rather than a small character. */
#ifdef SUPPORT_UTF
-#ifdef COMPILE_PCRE8
- if (utf && (code[-1] & 0x80) != 0)
-#endif /* COMPILE_PCRE8 */
-#ifdef COMPILE_PCRE16
- if (utf && (code[-1] & 0xfc00) == 0xdc00)
-#endif /* COMPILE_PCRE8 */
+ if (utf && NOT_FIRSTCHAR(code[-1]))
{
pcre_uchar *lastchar = code - 1;
BACKCHAR(lastchar);
@@ -4625,7 +4620,6 @@ for (;; ptr++)
/* Handle the case of a single charater - either with no UTF support, or
with UTF disabled, or for a single character UTF character. */
-
{
c = code[-1];
if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
@@ -7438,8 +7432,14 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
int newnl = 0;
int newbsr = 0;
+#ifdef COMPILE_PCRE8
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
{ skipatstart += 7; options |= PCRE_UTF8; continue; }
+#endif
+#ifdef COMPILE_PCRE16
+ if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+ { skipatstart += 8; options |= PCRE_UTF16; continue; }
+#endif
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
{ skipatstart += 6; options |= PCRE_UCP; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
diff --git a/pcre_config.c b/pcre_config.c
index a7792f7..92c9973 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -73,7 +73,7 @@ pcre16_config(int what, void *where)
switch (what)
{
case PCRE_CONFIG_UTF8:
-#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
*((int *)where) = 1;
#else
*((int *)where) = 0;
@@ -81,7 +81,7 @@ switch (what)
break;
case PCRE_CONFIG_UTF16:
-#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
+#if defined SUPPORT_UTF && defined COMPILE_PCRE16
*((int *)where) = 1;
#else
*((int *)where) = 0;
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 7cceaae..a5bc745 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -2683,7 +2683,7 @@ for (;;)
const pcre_uchar *p = start_subject + local_offsets[rc];
const pcre_uchar *pp = start_subject + local_offsets[rc+1];
int charcount = local_offsets[rc+1] - local_offsets[rc];
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
if (charcount > 0)
{
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
@@ -2780,7 +2780,7 @@ for (;;)
const pcre_uchar *p = ptr;
const pcre_uchar *pp = local_ptr;
charcount = pp - p;
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}
}
@@ -2862,7 +2862,7 @@ for (;;)
{
const pcre_uchar *p = start_subject + local_offsets[0];
const pcre_uchar *pp = start_subject + local_offsets[1];
- while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
+ while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
@@ -3144,7 +3144,7 @@ else
/* Check a UTF-8 string if required. Unfortunately there's no way of passing
back the character offset. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{
int erroroffset;
@@ -3159,17 +3159,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
}
-#ifdef COMPILE_PCRE8
- if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
- return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
+ NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
}
#endif
diff --git a/pcre_exec.c b/pcre_exec.c
index 676f4b8..c5932f7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6038,17 +6038,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
}
/* Check that a start_offset points to the start of a UTF character. */
-#ifdef COMPILE_PCRE8
if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
+ NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;
-#else
-#ifdef COMPILE_PCRE16
- if (start_offset > 0 && start_offset < length &&
- (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00)
- return PCRE_ERROR_BADUTF8_OFFSET;
-#endif /* COMPILE_PCRE16 */
-#endif /* COMPILE_PCRE8 */
}
#endif
diff --git a/pcre_internal.h b/pcre_internal.h
index 624e07c..e748809 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -57,33 +57,32 @@ the PRIV macro. */
#define COMPILE_PCRE8
#endif
-/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
+/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The
+"configure" script ensures this, but not everybody uses "configure". */
-#if defined EBCDIC && (defined SUPPORT_UTF8 || defined SUPPORT_UTF16)
-#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#if defined SUPPORT_UCP && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif
-/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility
+reasons with existing code. */
-#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
-#define SUPPORT_UTF8 1
+#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
#endif
-/* If SUPPORT_UCP is defined, SUPPORT_UTF16 must also be defined. The
-"configure" script ensures this, but not everybody uses "configure". */
+/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code.
+Until then we define it if SUPPORT_UTF is defined. */
-#if defined SUPPORT_UCP && defined COMPILE_PCRE16 && !defined SUPPORT_UTF16
-#define SUPPORT_UTF16 1
+#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8)
+#define SUPPORT_UTF8 1
#endif
-/* This macro is defined if either UTF-8 or UTF-16 support or both are
-enabled. */
+/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
-#if defined SUPPORT_UTF8 || defined SUPPORT_UTF16
-/* Unicode Transformation Format is enabled. */
-#define SUPPORT_UTF 1
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
#endif
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -524,16 +523,18 @@ capturing parenthesis numbers in back references. */
#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE
-/* When UTF-8 encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-never be called in byte mode. To make sure they can never even appear when
-UTF-8 support is omitted, we don't even define them. */
+/* When UTF encoding is being used, a character is no longer just a single
+character. The macros for character handling generate simple sequences when
+used in character-mode, and more complicated ones for UTF characters.
+GETCHARLENTEST and other macros are not used when UTF is not supported,
+so they are not defined. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
+
+#ifndef SUPPORT_UTF
/* #define HAS_EXTRALEN(c) */
/* #define GET_EXTRALEN(c) */
-#ifndef SUPPORT_UTF
+/* #define NOT_FIRSTCHAR(c) */
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@@ -562,6 +563,11 @@ Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
/* Base macro to pick up the remaining bytes of a UTF-8 character, not
advancing the pointer. */
@@ -724,6 +730,11 @@ Otherwise it has an undefined behaviour. */
#define GET_EXTRALEN(c) 1
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
/* Base macro to pick up the low surrogate of a UTF-16 character, not
advancing the pointer. */
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 3b85b85..8c6b206 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -1240,7 +1240,7 @@ OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_T
OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
/* Copy the integer value to the output buffer */
#ifdef COMPILE_PCRE16
-OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ASHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
#endif
OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
@@ -1353,7 +1353,7 @@ if (!ispowerof2(bit))
#ifdef COMPILE_PCRE8
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf && c > 127)
{
n = GET_EXTRALEN(*cc);
@@ -1364,13 +1364,13 @@ if (common->utf && c > 127)
}
return (n << 8) | bit;
}
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
return (0 << 8) | bit;
#else /* COMPILE_PCRE8 */
#ifdef COMPILE_PCRE16
-#ifdef SUPPORT_UTF16
+#ifdef SUPPORT_UTF
if (common->utf && c > 65535)
{
if (bit >= (1 << 10))
@@ -1378,7 +1378,7 @@ if (common->utf && c > 65535)
else
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
}
-#endif /* SUPPORT_UTF16 */
+#endif /* SUPPORT_UTF */
return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
#endif /* COMPILE_PCRE16 */
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 8aee260..d82af25 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -616,7 +616,7 @@ static struct regression_test_case regression_test_cases[] = {
{ 0, 0, NULL, NULL }
};
-pcre_jit_stack* callback(void *arg)
+static pcre_jit_stack* callback(void *arg)
{
return (pcre_jit_stack *)arg;
}