diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-06 11:33:41 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-06 11:33:41 +0000 |
commit | b4a0233a732c67c98886725229df86fc150b0e82 (patch) | |
tree | e0a12eaa6f6f8aedd3f5b76969cb0a12a78f1341 | |
parent | a9839b968cee5828bf35dbcb05a31859a49ab7a2 (diff) | |
download | pcre-b4a0233a732c67c98886725229df86fc150b0e82.tar.gz |
Updating pcre_jit_test. Most of the JIT tests are working now in 16 bit mode.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@786 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | pcre.h.in | 2 | ||||
-rw-r--r-- | pcre16_config.c | 45 | ||||
-rw-r--r-- | pcre_compile.c | 27 | ||||
-rw-r--r-- | pcre_config.c | 15 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 4 | ||||
-rw-r--r-- | pcre_exec.c | 4 | ||||
-rw-r--r-- | pcre_internal.h | 14 | ||||
-rw-r--r-- | pcre_jit_compile.c | 109 | ||||
-rw-r--r-- | pcre_jit_test.c | 417 |
10 files changed, 471 insertions, 167 deletions
diff --git a/Makefile.am b/Makefile.am index 817b01a..b64ccd5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -211,6 +211,7 @@ lib_LTLIBRARIES += libpcre16.la libpcre16_la_SOURCES = \ pcre16_chartables.c \ pcre16_compile.c \ + pcre16_config.c \ pcre16_exec.c \ pcre16_fullinfo.c \ pcre16_info.c \ @@ -234,6 +234,7 @@ compatible. */ #define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 #define PCRE_CONFIG_BSR 8 #define PCRE_CONFIG_JIT 9 +#define PCRE_CONFIG_UTF16 10 /* Request types for pcre_study(). Do not re-arrange, in order to remain compatible. */ @@ -353,6 +354,7 @@ PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, PCRE_EXP_DECL pcre *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, int *, const unsigned char *); PCRE_EXP_DECL int pcre_config(int, void *); +PCRE_EXP_DECL int pcre16_config(int, void *); PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, int *, int, const char *, char *, int); PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *, diff --git a/pcre16_config.c b/pcre16_config.c new file mode 100644 index 0000000..826b100 --- /dev/null +++ b/pcre16_config.c @@ -0,0 +1,45 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2011 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* Generate code with 16 bit character support. */ +#define COMPILE_PCRE16 + +#include "pcre_config.c" + +/* End of pcre16_config.c */ diff --git a/pcre_compile.c b/pcre_compile.c index da22f59..bdfac5b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3738,8 +3738,8 @@ for (;; ptr++) { const pcre_uchar *oldptr; -#ifdef SUPPORT_UTF8 - if (utf && c > 127) +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(c)) { /* Braces are required because the */ GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ } @@ -4317,11 +4317,10 @@ for (;; ptr++) #ifdef SUPPORT_UTF if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) -#endif -#ifndef COMPILE_PCRE8 +#elif !(defined COMPILE_PCRE8) if (c > 255) #endif -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) { xclass = TRUE; *class_uchardata++ = XCL_SINGLE; @@ -4345,8 +4344,7 @@ for (;; ptr++) } else -#endif /* SUPPORT_UTF8 */ - +#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ /* Handle a single-byte character */ { classbits[c/8] |= (1 << (c&7)); @@ -4358,6 +4356,7 @@ for (;; ptr++) class_charcount++; class_lastchar = c; } + } /* Loop until ']' reached. This "while" is the end of the "do" far above. @@ -5849,7 +5848,7 @@ for (;; ptr++) for (i = 0; i < cd->names_found; i++) { - int crc = memcmp(name, slot+2, namelen); + int crc = memcmp(name, slot+2, IN_UCHARS(namelen)); if (crc == 0) { if (slot[2+namelen] == 0) @@ -7440,7 +7439,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && int newnl = 0; int newbsr = 0; - if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0) + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0) { skipatstart += 7; options |= PCRE_UTF8; continue; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) { skipatstart += 6; options |= PCRE_UCP; continue; } @@ -7805,8 +7804,7 @@ if ((re->options & PCRE_ANCHORED) == 0) if (cd->fcc[re->first_char] != re->first_char) re->flags |= PCRE_FCH_CASELESS; } - else if ((options & PCRE_UCP) != 0 - && UCD_OTHERCASE(re->first_char) != re->first_char) + else if (UCD_OTHERCASE(re->first_char) != re->first_char) re->flags |= PCRE_FCH_CASELESS; } else @@ -7843,13 +7841,12 @@ if (reqchar >= 0 && /* We ignore non-ASCII first chars in 8 bit mode. */ if (utf) { - if (re->first_char < 128) + if (re->req_char < 128) { - if (cd->fcc[re->first_char] != re->first_char) + if (cd->fcc[re->req_char] != re->req_char) re->flags |= PCRE_RCH_CASELESS; } - else if ((options & PCRE_UCP) != 0 - && UCD_OTHERCASE(re->first_char) != re->first_char) + else if (UCD_OTHERCASE(re->req_char) != re->req_char) re->flags |= PCRE_RCH_CASELESS; } else diff --git a/pcre_config.c b/pcre_config.c index bf42c02..a7792f7 100644 --- a/pcre_config.c +++ b/pcre_config.c @@ -62,13 +62,26 @@ Arguments: Returns: 0 if data returned, negative on error */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_config(int what, void *where) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_config(int what, void *where) +#endif { switch (what) { case PCRE_CONFIG_UTF8: -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8 + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; + + case PCRE_CONFIG_UTF16: +#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16 *((int *)where) = 1; #else *((int *)where) = 0; diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 1bc96c1..7cceaae 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -3202,7 +3202,7 @@ if (!anchored) { first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (first_char > 127 && utf && md->use_ucp) + if (utf && first_char > 127) first_char2 = UCD_OTHERCASE(first_char); #endif } @@ -3226,7 +3226,7 @@ if ((re->flags & PCRE_REQCHSET) != 0) { req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (req_char > 127 && utf && md->use_ucp) + if (utf && req_char > 127) req_char2 = UCD_OTHERCASE(req_char); #endif } diff --git a/pcre_exec.c b/pcre_exec.c index bb1b60a..5f0a156 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6267,7 +6267,7 @@ if (!anchored) { first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (first_char > 127 && utf && md->use_ucp) + if (utf && first_char > 127) first_char2 = UCD_OTHERCASE(first_char); #endif } @@ -6289,7 +6289,7 @@ if ((re->flags & PCRE_REQCHSET) != 0) { req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (req_char > 127 && utf && md->use_ucp) + if (utf && req_char > 127) req_char2 = UCD_OTHERCASE(req_char); #endif } diff --git a/pcre_internal.h b/pcre_internal.h index 4046e41..b93101f 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1166,7 +1166,12 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */ #define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" -#define STRING_UTF8_RIGHTPAR "UTF8)" +#ifdef COMPILE_PCRE8 +#define STRING_UTF_RIGHTPAR "UTF8)" +#endif +#ifdef COMPILE_PCRE16 +#define STRING_UTF_RIGHTPAR "UTF16)" +#endif #define STRING_UCP_RIGHTPAR "UCP)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" @@ -1421,7 +1426,12 @@ only. */ #define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS -#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#ifdef COMPILE_PCRE8 +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#endif +#ifdef COMPILE_PCRE16 +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS +#endif #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index df158be..3b85b85 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -297,7 +297,7 @@ typedef struct compiler_common { jump_list *casefulcmp; jump_list *caselesscmp; BOOL jscript_compat; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF BOOL utf; #ifdef SUPPORT_UCP BOOL use_ucp; @@ -306,7 +306,7 @@ typedef struct compiler_common { #ifdef COMPILE_PCRE8 jump_list *utfreadtype8; #endif -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP jump_list *getucd; #endif @@ -500,7 +500,7 @@ switch(*cc) return cc + 1; case OP_ANYBYTE: -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) return NULL; #endif return cc + 1; @@ -576,6 +576,8 @@ switch(*cc) case OP_NOTPROP: case OP_PROP: + return cc + 1 + 2; + case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: @@ -1267,7 +1269,7 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* /* Detects if the character has an othercase. */ unsigned int c; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { GETCHAR(c, cc); @@ -1279,6 +1281,9 @@ if (common->utf) return FALSE; #endif } +#ifndef COMPILE_PCRE8 + return common->fcc[c] != c; +#endif } else #endif @@ -1769,6 +1774,9 @@ if (newlinecheck) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); +#ifdef COMPILE_PCRE16 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +#endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); nl = JUMP(SLJIT_JUMP); } @@ -1776,7 +1784,7 @@ if (newlinecheck) mainloop = LABEL(); /* Increasing the STR_PTR here requires one less jump in the most common case. */ -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) readuchar = TRUE; #endif if (newlinecheck) readuchar = TRUE; @@ -1843,7 +1851,7 @@ if (caseless) { oc = TABLE_GET(first_char, common->fcc, first_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (first_char > 127 && common->utf && common->use_ucp) + if (first_char > 127 && common->utf) oc = UCD_OTHERCASE(first_char); #endif } @@ -2077,7 +2085,7 @@ if (caseless) { oc = TABLE_GET(req_char, common->fcc, req_char); #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (req_char > 127 && common->utf && common->use_ucp) + if (req_char > 127 && common->utf) oc = UCD_OTHERCASE(req_char); #endif } @@ -2265,7 +2273,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); @@ -2289,7 +2297,7 @@ COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); @@ -2323,7 +2331,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); @@ -2415,8 +2423,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); #undef CHAR1 #undef CHAR2 -#ifdef SUPPORT_UTF8 -#ifdef SUPPORT_UCP +#if defined SUPPORT_UTF && defined SUPPORT_UCP static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) { @@ -2436,8 +2443,7 @@ while (src1 < end1) return src2; } -#endif -#endif +#endif /* SUPPORT_UTF && SUPPORT_UCP */ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, compare_context* context, jump_list **fallbacks) @@ -2445,7 +2451,7 @@ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, DEFINE_COMPILER; unsigned int othercasebit = 0; pcre_uchar *othercasechar = NULL; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF int utflength; #endif @@ -2588,7 +2594,7 @@ do #endif cc++; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF utflength--; } while (utflength > 0); @@ -2646,7 +2652,7 @@ if ((*cc++ & XCL_MAP) != 0) OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); #ifndef COMPILE_PCRE8 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#elif defined SUPPORT_UTF8 +#elif defined SUPPORT_UTF if (common->utf) jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif @@ -2660,7 +2666,7 @@ if ((*cc++ & XCL_MAP) != 0) #ifndef COMPILE_PCRE8 JUMPHERE(jump); -#elif defined SUPPORT_UTF8 +#elif defined SUPPORT_UTF if (common->utf) JUMPHERE(jump); #endif @@ -2795,7 +2801,7 @@ while (*cc != XCL_END) if (*cc == XCL_SINGLE) { cc ++; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { GETCHARINC(c, cc); @@ -2826,7 +2832,7 @@ while (*cc != XCL_END) else if (*cc == XCL_RANGE) { cc ++; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { GETCHARINC(c, cc); @@ -2835,7 +2841,7 @@ while (*cc != XCL_END) #endif c = *cc++; SET_CHAR_OFFSET(c); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { GETCHARINC(c, cc); @@ -2963,7 +2969,7 @@ int length; unsigned int c, oc, bit; compare_context context; struct sljit_jump *jump[4]; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF struct sljit_label *label; #ifdef SUPPORT_UCP pcre_uchar propdata[5]; @@ -3063,7 +3069,7 @@ switch(type) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF #ifdef SUPPORT_UCP case OP_NOTPROP: case OP_PROP: @@ -3279,7 +3285,7 @@ switch(type) } check_input_end(common, fallbacks); read_char(common); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { GETCHAR(c, cc); @@ -3296,16 +3302,14 @@ switch(type) case OP_NOT: case OP_NOTI: + check_input_end(common, fallbacks); + length = 1; #ifdef SUPPORT_UTF if (common->utf) { - length = 1; - if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); - - check_input_end(common, fallbacks); - GETCHAR(c, cc); - - if (c <= 127) +#ifdef COMPILE_PCRE8 + c = *cc; + if (c < 128) { OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) @@ -3317,24 +3321,24 @@ switch(type) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); } /* Skip the variable-length character. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#ifdef COMPILE_PCRE8 - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); -#endif + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(jump[0]); - return cc + length; + return cc + 1; } else +#endif /* COMPILE_PCRE8 */ + { + GETCHARLEN(c, cc, length); read_char(common); + } } else -#endif +#endif /* SUPPORT_UTF */ { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + read_char(common); c = *cc; } @@ -3363,10 +3367,11 @@ switch(type) read_char(common); #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 jump[0] = NULL; -#ifdef SUPPORT_UTF8 - /* This check can only be skipped in pure 8 bit mode. */ +#ifdef COMPILE_PCRE8 + /* This check only affects 8 bit mode. In other modes, we + always need to compare the value with 255. */ if (common->utf) -#endif +#endif /* COMPILE_PCRE8 */ { jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); if (type == OP_CLASS) @@ -3375,7 +3380,7 @@ switch(type) jump[0] = NULL; } } -#endif +#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc); @@ -3385,7 +3390,7 @@ switch(type) #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (jump[0] != NULL) JUMPHERE(jump[0]); -#endif +#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ return cc + 32 / sizeof(pcre_uchar); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 @@ -3399,7 +3404,7 @@ switch(type) SLJIT_ASSERT(length > 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF if (common->utf) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); @@ -3411,7 +3416,7 @@ switch(type) return cc + LINK_SIZE; } #endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); return cc + LINK_SIZE; } @@ -3548,8 +3553,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); if (withchecks && !common->jscript_compat) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); -#ifdef SUPPORT_UTF8 -#ifdef SUPPORT_UCP +#if defined SUPPORT_UTF && defined SUPPORT_UCP if (common->utf && *cc == OP_REFI) { SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3); @@ -3567,8 +3571,7 @@ if (common->utf && *cc == OP_REFI) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); } else -#endif -#endif +#endif /* SUPPORT_UTF && SUPPORT_UCP */ { OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); if (withchecks) @@ -6422,7 +6425,7 @@ common->vspace = NULL; common->casefulcmp = NULL; common->caselesscmp = NULL; common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF /* PCRE_UTF16 has the same value as PCRE_UTF8. */ common->utf = (re->options & PCRE_UTF8) != 0; #ifdef SUPPORT_UCP @@ -6432,7 +6435,7 @@ common->utfreadchar = NULL; #ifdef COMPILE_PCRE8 common->utfreadtype8 = NULL; #endif -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP common->getucd = NULL; #endif diff --git a/pcre_jit_test.c b/pcre_jit_test.c index 8a03272..e4d2432 100644 --- a/pcre_jit_test.c +++ b/pcre_jit_test.c @@ -51,18 +51,35 @@ POSSIBILITY OF SUCH DAMAGE. #define PCRE_BUG 0x80000000 /* - Hungarian utf8 characters - \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E') - \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A') - \xe6\x92\xad = 0x64ad = 25773 (a valid kanji) - \xc2\x85 = 0x85 (NExt Line = NEL) - \xc2\xa1 = 0xa1 (Inverted Exclamation Mark) - \xe2\x80\xa8 = 0x2028 (Line Separator) - \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length) - \xcc\x8d = 781 (Something with Mark property) + Letter characters: + \xe6\x92\xad = 0x64ad = 25773 (kanji) + Non-letter characters: + \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark) + \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888 + Newlines: + \xc2\x85 = 0x85 = 133 (NExt Line = NEL) + \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator) + Othercase pairs: + \xc3\xa9 = 0xe9 = 233 (e') + \xc3\x89 = 0xc9 = 201 (E') + \xc3\xa1 = 0xe1 = 225 (a') + \xc3\x81 = 0xc1 = 193 (A') + \xc8\xba = 0x23a = 570 + \xe2\xb1\xa5 = 0x2c65 = 11365 + \xe1\xbd\xb8 = 0x1f78 = 8056 + \xe1\xbf\xb8 = 0x1ff8 = 8184 + \xf0\x90\x90\x80 = 0x10400 = 66560 + \xf0\x90\x90\xa8 = 0x10428 = 66600 + Mark property: + \xcc\x8d = 0x30d = 781 + Special: + \xdf\xbf = 0x7ff = 2047 (highest 2 byte character) + \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character) + \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character) + \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character) + \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character) */ -static void setstack(pcre_extra *extra); static int regression_tests(void); int main(void) @@ -76,21 +93,12 @@ int main(void) return regression_tests(); } -static pcre_jit_stack* callback(void *arg) -{ - return (pcre_jit_stack *)arg; -} - -static void setstack(pcre_extra *extra) -{ - static pcre_jit_stack *stack; - if (stack) pcre_jit_stack_free(stack); - stack = pcre_jit_stack_alloc(1, 1024 * 1024); - pcre_assign_jit_stack(extra, callback, stack); -} - /* --------------------------------------------------------------------------------------- */ +#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16) +#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined +#endif + #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF) #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP) #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF) @@ -139,6 +147,10 @@ static struct regression_test_case regression_test_cases[] = { { CMA, 0, "\\Ca", "CDA" }, { MA, 0, "\\Cx", "cda" }, { CMA, 0, "\\Cx", "CDA" }, + { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, + { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, /* Assertions. */ { MUA, 0, "\\b[^A]", "A_B#" }, @@ -151,6 +163,7 @@ static struct regression_test_case regression_test_cases[] = { { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" }, { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, { MUA, 0, "\\b.", "\xcd\xbe" }, + { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" }, { MA, 0, "\\R^", "\n" }, { MA, 1, "^", "\n" }, { 0, 0, "^ab", "ab" }, @@ -267,6 +280,7 @@ static struct regression_test_case regression_test_cases[] = { { MUA, 0, "\\b\\w+\\B", "x,a_cd" }, { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, + { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, /* Basic character sets. */ { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, @@ -449,6 +463,7 @@ static struct regression_test_case regression_test_cases[] = { { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" }, { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" }, { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." }, + { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" }, /* Assertions. */ { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, @@ -601,111 +616,328 @@ static struct regression_test_case regression_test_cases[] = { { 0, 0, NULL, NULL } }; +pcre_jit_stack* callback(void *arg) +{ + return (pcre_jit_stack *)arg; +} + +static void setstack(pcre_extra *extra, int realloc) +{ + static pcre_jit_stack *stack; + + if (realloc) { + if (stack) + pcre_jit_stack_free(stack); + stack = pcre_jit_stack_alloc(1, 1024 * 1024); + } + /* Extra can be NULL. */ + pcre_assign_jit_stack(extra, callback, stack); +} + +#ifdef SUPPORT_PCRE16 + +static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length) +{ + unsigned char *ptr = (unsigned char*)input; + PCRE_SCHAR16 *optr = output; + unsigned int c; + + if (max_length == 0) + return 0; + + while (*ptr && max_length > 1) { + c = 0; + if (offsetmap) + *offsetmap++ = (int)(ptr - (unsigned char*)input); + + if (!(*ptr & 0x80)) + c = *ptr++; + else if (!(*ptr & 0x20)) { + c = ((ptr[0] & 0x1f) << 6) | (ptr[1] & 0x3f); + ptr += 2; + } else if (!(*ptr & 0x10)) { + c = ((ptr[0] & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f); + ptr += 3; + } else if (!(*ptr & 0x08)) { + c = ((ptr[0] & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f); + ptr += 4; + } + + if (c < 65536) { + *optr++ = c; + max_length--; + } else if (max_length <= 2) { + *optr = '\0'; + return optr - output; + } else { + c -= 0x10000; + *optr++ = 0xd800 | ((c >> 10) & 0x3ff); + *optr++ = 0xdc00 | (c & 0x3ff); + max_length -= 2; + if (offsetmap) + offsetmap++; + } + } + if (offsetmap) + *offsetmap = (int)(ptr - (unsigned char*)input); + *optr = '\0'; + return optr - output; +} + +static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length) +{ + PCRE_SCHAR16 *optr = output; + + if (max_length == 0) + return 0; + + while (*input && max_length > 1) { + *optr++ = *input++; + max_length--; + } + *optr = '\0'; + return optr - output; +} + +#define REGTEST_MAX_LENGTH 4096 +static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH]; +static int regtest_offsetmap[REGTEST_MAX_LENGTH]; + +#endif /* SUPPORT_PCRE16 */ + static int regression_tests(void) { - pcre *re; struct regression_test_case *current = regression_test_cases; const char *error; - pcre_extra *extra; - int utf8 = 0, ucp = 0; - int ovector1[32]; - int ovector2[32]; - int return_value1, return_value2; - int i, err_offs; - int total = 0, succesful = 0; + int i, err_offs, is_succesful; + int total = 0; + int succesful = 0; int counter = 0; - int disabled_flags = PCRE_BUG; +#ifdef SUPPORT_PCRE8 + pcre *re8; + pcre_extra *extra8; + int ovector8_1[32]; + int ovector8_2[32]; + int return_value8_1, return_value8_2; + int utf8 = 0, ucp8 = 0; + int disabled_flags8 = PCRE_BUG; +#endif +#ifdef SUPPORT_PCRE16 + pcre *re16; + pcre_extra *extra16; + int ovector16_1[32]; + int ovector16_2[32]; + int return_value16_1, return_value16_2; + int utf16 = 0, ucp16 = 0; + int disabled_flags16 = PCRE_BUG; + int length16; +#endif /* This test compares the behaviour of interpreter and JIT. Although disabling - utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is + utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is still considered successful from pcre_jit_test point of view. */ + printf("Running JIT regression\n"); + +#ifdef SUPPORT_PCRE8 pcre_config(PCRE_CONFIG_UTF8, &utf8); - pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp); + pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8); if (!utf8) - disabled_flags |= PCRE_UTF8; - if (!ucp) - disabled_flags |= PCRE_UCP; + disabled_flags8 |= PCRE_UTF8; + if (!ucp8) + disabled_flags8 |= PCRE_UCP; + printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled"); +#endif +#ifdef SUPPORT_PCRE16 + pcre16_config(PCRE_CONFIG_UTF16, &utf16); + pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16); + if (!utf16) + disabled_flags16 |= PCRE_UTF8; + if (!ucp16) + disabled_flags16 |= PCRE_UCP; + printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled"); +#endif - printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled"); while (current->pattern) { /* printf("\nPattern: %s :\n", current->pattern); */ total++; error = NULL; - re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL); - - if (!re) { - if (utf8 && ucp) - printf("\nCannot compile pattern: %s\n", current->pattern); - else { - /* Some patterns cannot be compiled when either of utf8 - or ucp is disabled. We just skip them. */ - printf("."); - succesful++; +#ifdef SUPPORT_PCRE8 + re8 = pcre_compile(current->pattern, + current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8), + &error, &err_offs, NULL); + + if (re8) { + error = NULL; + extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error); + if (!extra8) { + printf("\n8 bit: Cannot study pattern: %s\n", current->pattern); + pcre_free(re8); + re8 = NULL; } - current++; - continue; - } - - error = NULL; - extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error); - if (!extra) { - printf("\nCannot study pattern: %s\n", current->pattern); - current++; - continue; - } - - if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { - printf("\nJIT compiler does not support: %s\n", current->pattern); - current++; - continue; - } + if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { + printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern); + pcre_free_study(extra8); + pcre_free(re8); + re8 = NULL; + } + } else if (utf8 && ucp8) + printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern); +#endif +#ifdef SUPPORT_PCRE16 + convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH); + re16 = pcre16_compile(regtest_buf, + current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16), + &error, &err_offs, NULL); + if (re16) { + error = NULL; + extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error); + if (!extra16) { + printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); + pcre_free(re16); + re16 = NULL; + } + if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { + printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern); + pcre_free_study(extra16); + pcre_free(re16); + re16 = NULL; + } + } else if (utf16 && ucp16) + printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern); +#endif counter++; if ((counter & 0x3) != 0) - setstack(extra); - - for (i = 0; i < 32; ++i) - ovector1[i] = -2; - return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32); + setstack(NULL, 1); + +#ifdef SUPPORT_PCRE8 + if (re8) { + setstack(extra8, 0); + for (i = 0; i < 32; ++i) + ovector8_1[i] = -2; + return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32); + + for (i = 0; i < 32; ++i) + ovector8_2[i] = -2; + return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32); + } +#endif - for (i = 0; i < 32; ++i) - ovector2[i] = -2; - return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32); +#ifdef SUPPORT_PCRE16 + if (re16) { + setstack(extra16, 0); + if (current->flags & PCRE_UTF8) + length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH); + else + length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH); + + for (i = 0; i < 32; ++i) + ovector16_1[i] = -2; + return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32); + + for (i = 0; i < 32; ++i) + ovector16_2[i] = -2; + return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset, + current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32); + } +#endif /* If PCRE_BUG is set, just run the test, but do not compare the results. Segfaults can still be captured. */ - if (!(current->flags & PCRE_BUG)) { - if (return_value1 != return_value2) { - printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input); - current++; - continue; - } - if (return_value1 >= 0) { - return_value1 *= 2; - err_offs = 0; - for (i = 0; i < return_value1; ++i) - if (ovector1[i] != ovector2[i]) { - printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input); - err_offs = 1; + is_succesful = 1; + if (!(current->flags & PCRE_BUG)) { +#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 + if ((current->flags & PCRE_UTF8) && utf8 && utf16) { + /* All results must be the same. */ + if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) { + printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n", + return_value8_1, return_value8_2, return_value16_1, return_value16_2, + total, current->pattern, current->input); + is_succesful = 0; + } else if (return_value8_1 >= 0) { + return_value8_1 *= 2; + /* Transform back the results. */ + for (i = 0; i < return_value8_1; ++i) { + if (ovector16_1[i] >= 0) + ovector16_1[i] = regtest_offsetmap[ovector16_1[i]]; + if (ovector16_2[i] >= 0) + ovector16_2[i] = regtest_offsetmap[ovector16_2[i]]; } - if (err_offs) { - current++; - continue; + + for (i = 0; i < return_value8_1; ++i) + if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { + printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n", + i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i], + total, current->pattern, current->input); + is_succesful = 0; + } + } + } else { +#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ + /* Only the 8 bit and 16 bit results must be equal. */ +#ifdef SUPPORT_PCRE8 + if (return_value8_1 != return_value8_2) { + printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", + return_value8_1, return_value8_2, total, current->pattern, current->input); + is_succesful = 0; + } else if (return_value8_1 >= 0) { + return_value8_1 *= 2; + for (i = 0; i < return_value8_1; ++i) + if (ovector8_1[i] != ovector8_2[i]) { + printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n", + i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input); + is_succesful = 0; + } } +#endif + +#ifdef SUPPORT_PCRE16 + if (return_value16_1 != return_value16_2) { + printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", + return_value16_1, return_value16_2, total, current->pattern, current->input); + is_succesful = 0; + } else if (return_value16_1 >= 0) { + return_value16_1 *= 2; + for (i = 0; i < return_value16_1; ++i) + if (ovector16_1[i] != ovector16_2[i]) { + printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n", + i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input); + is_succesful = 0; + } + } +#endif + +#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 } +#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */ } - pcre_free_study(extra); - pcre_free(re); + if (is_succesful) + succesful++; + +#ifdef SUPPORT_PCRE8 + if (re8) { + pcre_free_study(extra8); + pcre_free(re8); + } +#endif +#ifdef SUPPORT_PCRE16 + if (re16) { + pcre16_free_study(extra16); + pcre_free(re16); + } +#endif /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */ printf("."); fflush(stdout); current++; - succesful++; } if (total == succesful) { @@ -717,4 +949,5 @@ static int regression_tests(void) } } + /* End of pcre_jit_test.c */ |