diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-08 07:36:41 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-08 07:36:41 +0000 |
commit | 24054b0ee8c34e475c8ecc21938f7139f1ca6d2c (patch) | |
tree | 3a50f360cdab48f89ac140f342640214a92e1449 | |
parent | 4b661f8c6abbe9be96af67b9d5547bb96359cc99 (diff) | |
download | pcre-24054b0ee8c34e475c8ecc21938f7139f1ca6d2c.tar.gz |
Adding --enable-utf option rather than --enable-utf16. --enable-utf8 is kept for compatibility reasons. And fixing other, minor issues.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@794 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | configure.ac | 115 | ||||
-rw-r--r-- | pcre16_ord2utf16.c | 6 | ||||
-rw-r--r-- | pcre16_utf16_utils.c | 6 | ||||
-rw-r--r-- | pcre16_valid_utf16.c | 6 | ||||
-rw-r--r-- | pcre_compile.c | 14 | ||||
-rw-r--r-- | pcre_config.c | 4 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 18 | ||||
-rw-r--r-- | pcre_exec.c | 10 | ||||
-rw-r--r-- | pcre_internal.h | 59 | ||||
-rw-r--r-- | pcre_jit_compile.c | 10 | ||||
-rw-r--r-- | pcre_jit_test.c | 2 |
11 files changed, 104 insertions, 146 deletions
diff --git a/configure.ac b/configure.ac index ff516c7..ef1524f 100644 --- a/configure.ac +++ b/configure.ac @@ -121,7 +121,7 @@ AC_ARG_ENABLE(pcre16, AC_ARG_ENABLE(cpp, AS_HELP_STRING([--disable-cpp], [disable C++ support]), - , enable_cpp=yes) + , enable_cpp=unset) AC_SUBST(enable_cpp) # Handle --enable-jit (disabled by default) @@ -145,19 +145,19 @@ AC_ARG_ENABLE(rebuild-chartables, # Handle --enable-utf8 (disabled by default) AC_ARG_ENABLE(utf8, AS_HELP_STRING([--enable-utf8], - [enable UTF-8 support (incompatible with --enable-ebcdic)]), + [another name for --enable-utf. Kept only for compatibility reasons]), , enable_utf8=unset) -# Handle --enable-utf16 (disabled by default) -AC_ARG_ENABLE(utf16, - AS_HELP_STRING([--enable-utf16], - [enable UTF-16 support (incompatible with --enable-ebcdic)]), - , enable_utf16=unset) +# Handle --enable-utf (disabled by default) +AC_ARG_ENABLE(utf, + AS_HELP_STRING([--enable-utf], + [enable UTF-8/16 support (incompatible with --enable-ebcdic)]), + , enable_utf=unset) # Handle --enable-unicode-properties AC_ARG_ENABLE(unicode-properties, AS_HELP_STRING([--enable-unicode-properties], - [enable Unicode properties support (implies --enable-utf8 and --enable-utf16)]), + [enable Unicode properties support (implies --enable-utf)]), , enable_unicode_properties=no) # Handle --enable-newline=NL @@ -199,7 +199,7 @@ AC_ARG_ENABLE(bsr-anycrlf, # Handle --enable-ebcdic AC_ARG_ENABLE(ebcdic, AS_HELP_STRING([--enable-ebcdic], - [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), + [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), , enable_ebcdic=no) # Handle --disable-stack-for-recursion @@ -263,24 +263,14 @@ AC_ARG_WITH(match-limit-recursion, [default limit on internal recursion (default=MATCH_LIMIT)]), , with_match_limit_recursion=MATCH_LIMIT) -# Make sure that if enable_utf8 was set, that enable_pcre8 support is enabled -if test "x$enable_utf8" = "xyes" +# Copy enable_utf8 value to enable_utf for compatibility reasons +if test "x$enable_utf8" != "xunset" then - if test "x$enable_pcre8" = "xno" + if test "x$enable_utf" != "xunset" then - AC_MSG_ERROR([support for UTF-8 requires pcre library with 8 bit characters]) + AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.]) fi - enable_pcre8=yes -fi - -# Make sure that if enable_utf16 was set, that enable_pcre16 support is enabled -if test "x$enable_utf16" = "xyes" -then - if test "x$enable_pcre16" = "xno" - then - AC_MSG_ERROR([support for UTF-16 requires pcre library with 16 bit characters]) - fi - enable_pcre16=yes + enable_utf=$enable_utf8 fi # Set the default value for pcre8 @@ -301,39 +291,26 @@ then AC_MSG_ERROR([Either 8 or 16 bit (or both) pcre library must be enabled]) fi -# Make sure that if enable_unicode_properties was set, that UTF-8 or UTF-16 -# support enabled. -# +# Make sure that if enable_unicode_properties was set, that UTF support is enabled. if test "x$enable_unicode_properties" = "xyes" then - if test "x$enable_utf8" = "xno" - then - AC_MSG_ERROR([support for Unicode properties requires UTF-8 support]) - fi - if test "x$enable_utf16" = "xno" - then - AC_MSG_ERROR([support for Unicode properties requires UTF-16 support]) - fi - if test "x$enable_pcre8" = "xyes" - then - enable_utf8=yes - fi - if test "x$enable_pcre16" = "xyes" + if test "x$enable_utf" = "xno" then - enable_utf16=yes + AC_MSG_ERROR([support for Unicode properties requires UTF-8/16 support]) fi + enable_utf=yes fi -# enable_utf8 is disabled by default. -if test "x$enable_utf8" = "xunset" +# enable_utf is disabled by default. +if test "x$enable_utf" = "xunset" then - enable_utf8=no + enable_utf=no fi -# enable_utf16 is disabled by default. -if test "x$enable_utf16" = "xunset" +# enable_cpp copies the value of enable_pcre8 by default +if test "x$enable_cpp" = "xunset" then - enable_utf16=no + enable_cpp=$enable_pcre8 fi # Make sure that if enable_cpp was set, that enable_pcre8 support is enabled @@ -346,20 +323,16 @@ then fi # Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. -# Also check that UTF-8 or UTF-16 support is not requested, because PCRE cannot -# handle EBCDIC and UTF in the same build. To do so it would need to use different +# Also check that UTF support is not requested, because PCRE cannot handle +# EBCDIC and UTF in the same build. To do so it would need to use different # character constants depending on the mode. # if test "x$enable_ebcdic" = "xyes" then enable_rebuild_chartables=yes - if test "x$enable_utf8" = "xyes" + if test "x$enable_utf" = "xyes" then - AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time]) - fi - if test "x$enable_utf16" = "xyes" - then - AC_MSG_ERROR([support for EBCDIC and UTF-16 cannot be enabled at the same time]) + AC_MSG_ERROR([support for EBCDIC and UTF-8/16 cannot be enabled at the same time]) fi fi @@ -502,8 +475,7 @@ AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes") AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes") AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes") -AM_CONDITIONAL(WITH_UTF8, test "x$enable_utf8" = "xyes") -AM_CONDITIONAL(WITH_UTF16, test "x$enable_utf16" = "xyes") +AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes") # Checks for typedefs, structures, and compiler characteristics. @@ -594,20 +566,12 @@ if test "$enable_pcregrep_jit" = "yes"; then Define to enable JIT support in pcregrep.]) fi -if test "$enable_utf8" = "yes"; then - AC_DEFINE([SUPPORT_UTF8], [], [ - Define to enable support for the UTF-8 Unicode encoding. This will - work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code - *or* ASCII/UTF-8, but not both at once.]) -fi - -if test "$enable_utf16" = "yes"; then - AC_DEFINE([SUPPORT_UTF16], [], [ - Define to enable support for the UTF-16 Unicode encoding. This will - work even in an EBCDIC environment, but it is incompatible with - the EBCDIC macro. That is, PCRE can support *either* EBCDIC code - *or* ASCII/UTF-16, but not both at once.]) +if test "$enable_utf" = "yes"; then + AC_DEFINE([SUPPORT_UTF], [], [ + Define to enable support for the UTF-8/16 Unicode encoding. This + will work even in an EBCDIC environment, but it is incompatible + with the EBCDIC macro. That is, PCRE can support *either* EBCDIC + code *or* ASCII/UTF-8/16, but not both at once.]) fi if test "$enable_unicode_properties" = "yes"; then @@ -742,9 +706,9 @@ if test "$enable_ebcdic" = "yes"; then character codes, define this macro as 1. On systems that can use "configure", this can be done via --enable-ebcdic. PCRE will then assume that all input strings are in EBCDIC. If you do not define - this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode. - It is not possible to build a version of PCRE that supports both - EBCDIC and UTF-8.]) + this macro, PCRE will assume input strings are ASCII or UTF-8/16 + Unicode. It is not possible to build a version of PCRE that + supports both EBCDIC and UTF-8/16.]) fi # Platform specific issues @@ -869,8 +833,7 @@ $PACKAGE-$VERSION configuration summary: Build 16 bit pcre library ....... : ${enable_pcre16} Build C++ library ............... : ${enable_cpp} Enable JIT compiling support .... : ${enable_jit} - Enable UTF-8 support ............ : ${enable_utf8} - Enable UTF-16 support ........... : ${enable_utf16} + Enable UTF-8/16 support ......... : ${enable_utf} Unicode properties .............. : ${enable_unicode_properties} Newline char/sequence ........... : ${enable_newline} \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf} diff --git a/pcre16_ord2utf16.c b/pcre16_ord2utf16.c index 67c4c5c..99bed29 100644 --- a/pcre16_ord2utf16.c +++ b/pcre16_ord2utf16.c @@ -67,7 +67,7 @@ Returns: number of characters placed in the buffer int PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer) { -#ifdef SUPPORT_UTF16 +#ifdef SUPPORT_UTF /* Checking invalid cvalue character, encoded as invalid UTF-16 character. Should never happen in practice. */ @@ -85,11 +85,11 @@ cvalue -= 0x10000; *buffer = 0xdc00 | (cvalue & 0x3ff); return 2; -#else +#else /* SUPPORT_UTF */ (void)(cvalue); /* Keep compiler happy; this function won't ever be */ (void)(buffer); /* called when SUPPORT_UTF8 is not defined. */ return 0; -#endif +#endif /* SUPPORT_UTF */ } /* End of pcre16_ord2utf16.c */ diff --git a/pcre16_utf16_utils.c b/pcre16_utf16_utils.c index 8f970bb..b927458 100644 --- a/pcre16_utf16_utils.c +++ b/pcre16_utf16_utils.c @@ -77,7 +77,7 @@ Returns: the number of characters placed into the output buffer, int pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms) { -#ifdef SUPPORT_UTF16 +#ifdef SUPPORT_UTF /* This function converts any UTF-16 string to host byte order and optionally removes any Byte Order Marks (BOMS). Returns with the remainig length. */ BOOL same_bo = TRUE; @@ -108,11 +108,11 @@ while (iptr < end) *optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */ } -#else +#else /* SUPPORT_UTF */ (void)(output); /* Keep picky compilers happy */ (void)(input); (void)(keep_boms); -#endif +#endif /* SUPPORT_UTF */ return length; } diff --git a/pcre16_valid_utf16.c b/pcre16_valid_utf16.c index cc3e50e..b8ec699 100644 --- a/pcre16_valid_utf16.c +++ b/pcre16_valid_utf16.c @@ -83,7 +83,7 @@ Returns: = 0 if the string is a valid UTF-16 string int PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) { -#ifdef SUPPORT_UTF16 +#ifdef SUPPORT_UTF register PCRE_PUCHAR p; register pcre_uchar c; @@ -135,10 +135,10 @@ for (p = string; length-- > 0; p++) } } -#else /* SUPPORT_UTF16 */ +#else /* SUPPORT_UTF */ (void)(string); /* Keep picky compilers happy */ (void)(length); -#endif +#endif /* SUPPORT_UTF */ return PCRE_UTF16_ERR0; /* This indicates success */ } diff --git a/pcre_compile.c b/pcre_compile.c index 223e475..24a7b1c 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4607,12 +4607,7 @@ for (;; ptr++) it's a length rather than a small character. */ #ifdef SUPPORT_UTF -#ifdef COMPILE_PCRE8 - if (utf && (code[-1] & 0x80) != 0) -#endif /* COMPILE_PCRE8 */ -#ifdef COMPILE_PCRE16 - if (utf && (code[-1] & 0xfc00) == 0xdc00) -#endif /* COMPILE_PCRE8 */ + if (utf && NOT_FIRSTCHAR(code[-1])) { pcre_uchar *lastchar = code - 1; BACKCHAR(lastchar); @@ -4625,7 +4620,6 @@ for (;; ptr++) /* Handle the case of a single charater - either with no UTF support, or with UTF disabled, or for a single character UTF character. */ - { c = code[-1]; if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt; @@ -7438,8 +7432,14 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && int newnl = 0; int newbsr = 0; +#ifdef COMPILE_PCRE8 if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0) { skipatstart += 7; options |= PCRE_UTF8; continue; } +#endif +#ifdef COMPILE_PCRE16 + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0) + { skipatstart += 8; options |= PCRE_UTF16; continue; } +#endif else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) { skipatstart += 6; options |= PCRE_UCP; continue; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) diff --git a/pcre_config.c b/pcre_config.c index a7792f7..92c9973 100644 --- a/pcre_config.c +++ b/pcre_config.c @@ -73,7 +73,7 @@ pcre16_config(int what, void *where) switch (what) { case PCRE_CONFIG_UTF8: -#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8 +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 *((int *)where) = 1; #else *((int *)where) = 0; @@ -81,7 +81,7 @@ switch (what) break; case PCRE_CONFIG_UTF16: -#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16 +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 *((int *)where) = 1; #else *((int *)where) = 0; diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 7cceaae..a5bc745 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -2683,7 +2683,7 @@ for (;;) const pcre_uchar *p = start_subject + local_offsets[rc]; const pcre_uchar *pp = start_subject + local_offsets[rc+1]; int charcount = local_offsets[rc+1] - local_offsets[rc]; - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; if (charcount > 0) { ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); @@ -2780,7 +2780,7 @@ for (;;) const pcre_uchar *p = ptr; const pcre_uchar *pp = local_ptr; charcount = pp - p; - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); } } @@ -2862,7 +2862,7 @@ for (;;) { const pcre_uchar *p = start_subject + local_offsets[0]; const pcre_uchar *pp = start_subject + local_offsets[1]; - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); if (repeat_state_offset >= 0) { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } @@ -3144,7 +3144,7 @@ else /* Check a UTF-8 string if required. Unfortunately there's no way of passing back the character offset. */ -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) { int erroroffset; @@ -3159,17 +3159,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)? PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; } -#ifdef COMPILE_PCRE8 - if (start_offset > 0 && start_offset < length && - (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80) - return PCRE_ERROR_BADUTF8_OFFSET; -#else -#ifdef COMPILE_PCRE16 if (start_offset > 0 && start_offset < length && - (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00) + NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) return PCRE_ERROR_BADUTF8_OFFSET; -#endif /* COMPILE_PCRE16 */ -#endif /* COMPILE_PCRE8 */ } #endif diff --git a/pcre_exec.c b/pcre_exec.c index 676f4b8..c5932f7 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6038,17 +6038,9 @@ if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) } /* Check that a start_offset points to the start of a UTF character. */ -#ifdef COMPILE_PCRE8 if (start_offset > 0 && start_offset < length && - (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80) + NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) return PCRE_ERROR_BADUTF8_OFFSET; -#else -#ifdef COMPILE_PCRE16 - if (start_offset > 0 && start_offset < length && - (((PCRE_PUCHAR)subject)[start_offset] & 0xfc00) == 0xdc00) - return PCRE_ERROR_BADUTF8_OFFSET; -#endif /* COMPILE_PCRE16 */ -#endif /* COMPILE_PCRE8 */ } #endif diff --git a/pcre_internal.h b/pcre_internal.h index 624e07c..e748809 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -57,33 +57,32 @@ the PRIV macro. */ #define COMPILE_PCRE8 #endif -/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure" -script prevents both being selected, but not everybody uses "configure". */ +/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The +"configure" script ensures this, but not everybody uses "configure". */ -#if defined EBCDIC && (defined SUPPORT_UTF8 || defined SUPPORT_UTF16) -#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported. +#if defined SUPPORT_UCP && !(defined SUPPORT_UTF) +#define SUPPORT_UTF 1 #endif -/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The -"configure" script ensures this, but not everybody uses "configure". */ +/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility +reasons with existing code. */ -#if defined SUPPORT_UCP && !defined SUPPORT_UTF8 -#define SUPPORT_UTF8 1 +#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF) +#define SUPPORT_UTF 1 #endif -/* If SUPPORT_UCP is defined, SUPPORT_UTF16 must also be defined. The -"configure" script ensures this, but not everybody uses "configure". */ +/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code. +Until then we define it if SUPPORT_UTF is defined. */ -#if defined SUPPORT_UCP && defined COMPILE_PCRE16 && !defined SUPPORT_UTF16 -#define SUPPORT_UTF16 1 +#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8) +#define SUPPORT_UTF8 1 #endif -/* This macro is defined if either UTF-8 or UTF-16 support or both are -enabled. */ +/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure" +script prevents both being selected, but not everybody uses "configure". */ -#if defined SUPPORT_UTF8 || defined SUPPORT_UTF16 -/* Unicode Transformation Format is enabled. */ -#define SUPPORT_UTF 1 +#if defined EBCDIC && defined SUPPORT_UTF +#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported. #endif /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef @@ -524,16 +523,18 @@ capturing parenthesis numbers in back references. */ #define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE -/* When UTF-8 encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is -not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should -never be called in byte mode. To make sure they can never even appear when -UTF-8 support is omitted, we don't even define them. */ +/* When UTF encoding is being used, a character is no longer just a single +character. The macros for character handling generate simple sequences when +used in character-mode, and more complicated ones for UTF characters. +GETCHARLENTEST and other macros are not used when UTF is not supported, +so they are not defined. To make sure they can never even appear when +UTF support is omitted, we don't even define them. */ + +#ifndef SUPPORT_UTF /* #define HAS_EXTRALEN(c) */ /* #define GET_EXTRALEN(c) */ -#ifndef SUPPORT_UTF +/* #define NOT_FIRSTCHAR(c) */ #define GETCHAR(c, eptr) c = *eptr; #define GETCHARTEST(c, eptr) c = *eptr; #define GETCHARINC(c, eptr) c = *eptr++; @@ -562,6 +563,11 @@ Otherwise it has an undefined behaviour. */ #define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f]) +/* Returns TRUE, if the given character is not the first character +of a UTF sequence. */ + +#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80) + /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer. */ @@ -724,6 +730,11 @@ Otherwise it has an undefined behaviour. */ #define GET_EXTRALEN(c) 1 +/* Returns TRUE, if the given character is not the first character +of a UTF sequence. */ + +#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00) + /* Base macro to pick up the low surrogate of a UTF-16 character, not advancing the pointer. */ diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index 3b85b85..8c6b206 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -1240,7 +1240,7 @@ OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_T OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); /* Copy the integer value to the output buffer */ #ifdef COMPILE_PCRE16 -OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1); +OP2(SLJIT_ASHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1); #endif OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); @@ -1353,7 +1353,7 @@ if (!ispowerof2(bit)) #ifdef COMPILE_PCRE8 -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf && c > 127) { n = GET_EXTRALEN(*cc); @@ -1364,13 +1364,13 @@ if (common->utf && c > 127) } return (n << 8) | bit; } -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ return (0 << 8) | bit; #else /* COMPILE_PCRE8 */ #ifdef COMPILE_PCRE16 -#ifdef SUPPORT_UTF16 +#ifdef SUPPORT_UTF if (common->utf && c > 65535) { if (bit >= (1 << 10)) @@ -1378,7 +1378,7 @@ if (common->utf && c > 65535) else return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); } -#endif /* SUPPORT_UTF16 */ +#endif /* SUPPORT_UTF */ return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); #endif /* COMPILE_PCRE16 */ diff --git a/pcre_jit_test.c b/pcre_jit_test.c index 8aee260..d82af25 100644 --- a/pcre_jit_test.c +++ b/pcre_jit_test.c @@ -616,7 +616,7 @@ static struct regression_test_case regression_test_cases[] = { { 0, 0, NULL, NULL } }; -pcre_jit_stack* callback(void *arg) +static pcre_jit_stack* callback(void *arg) { return (pcre_jit_stack *)arg; } |