summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-06 11:33:41 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-06 11:33:41 +0000
commitb4a0233a732c67c98886725229df86fc150b0e82 (patch)
treee0a12eaa6f6f8aedd3f5b76969cb0a12a78f1341
parenta9839b968cee5828bf35dbcb05a31859a49ab7a2 (diff)
downloadpcre-b4a0233a732c67c98886725229df86fc150b0e82.tar.gz
Updating pcre_jit_test. Most of the JIT tests are working now in 16 bit mode.
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@786 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--Makefile.am1
-rw-r--r--pcre.h.in2
-rw-r--r--pcre16_config.c45
-rw-r--r--pcre_compile.c27
-rw-r--r--pcre_config.c15
-rw-r--r--pcre_dfa_exec.c4
-rw-r--r--pcre_exec.c4
-rw-r--r--pcre_internal.h14
-rw-r--r--pcre_jit_compile.c109
-rw-r--r--pcre_jit_test.c417
10 files changed, 471 insertions, 167 deletions
diff --git a/Makefile.am b/Makefile.am
index 817b01a..b64ccd5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -211,6 +211,7 @@ lib_LTLIBRARIES += libpcre16.la
libpcre16_la_SOURCES = \
pcre16_chartables.c \
pcre16_compile.c \
+ pcre16_config.c \
pcre16_exec.c \
pcre16_fullinfo.c \
pcre16_info.c \
diff --git a/pcre.h.in b/pcre.h.in
index b9ec777..ea11766 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -234,6 +234,7 @@ compatible. */
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
#define PCRE_CONFIG_JIT 9
+#define PCRE_CONFIG_UTF16 10
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
@@ -353,6 +354,7 @@ PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
PCRE_EXP_DECL pcre *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
+PCRE_EXP_DECL int pcre16_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
diff --git a/pcre16_config.c b/pcre16_config.c
new file mode 100644
index 0000000..826b100
--- /dev/null
+++ b/pcre16_config.c
@@ -0,0 +1,45 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Copyright (c) 1997-2011 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* Generate code with 16 bit character support. */
+#define COMPILE_PCRE16
+
+#include "pcre_config.c"
+
+/* End of pcre16_config.c */
diff --git a/pcre_compile.c b/pcre_compile.c
index da22f59..bdfac5b 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3738,8 +3738,8 @@ for (;; ptr++)
{
const pcre_uchar *oldptr;
-#ifdef SUPPORT_UTF8
- if (utf && c > 127)
+#ifdef SUPPORT_UTF
+ if (utf && HAS_EXTRALEN(c))
{ /* Braces are required because the */
GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */
}
@@ -4317,11 +4317,10 @@ for (;; ptr++)
#ifdef SUPPORT_UTF
if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
-#endif
-#ifndef COMPILE_PCRE8
+#elif !(defined COMPILE_PCRE8)
if (c > 255)
#endif
-#if defined SUPPORT_UTF || defined COMPILE_PCRE16
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
{
xclass = TRUE;
*class_uchardata++ = XCL_SINGLE;
@@ -4345,8 +4344,7 @@ for (;; ptr++)
}
else
-#endif /* SUPPORT_UTF8 */
-
+#endif /* SUPPORT_UTF || COMPILE_PCRE16 */
/* Handle a single-byte character */
{
classbits[c/8] |= (1 << (c&7));
@@ -4358,6 +4356,7 @@ for (;; ptr++)
class_charcount++;
class_lastchar = c;
}
+
}
/* Loop until ']' reached. This "while" is the end of the "do" far above.
@@ -5849,7 +5848,7 @@ for (;; ptr++)
for (i = 0; i < cd->names_found; i++)
{
- int crc = memcmp(name, slot+2, namelen);
+ int crc = memcmp(name, slot+2, IN_UCHARS(namelen));
if (crc == 0)
{
if (slot[2+namelen] == 0)
@@ -7440,7 +7439,7 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
int newnl = 0;
int newbsr = 0;
- if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0)
+ if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
{ skipatstart += 7; options |= PCRE_UTF8; continue; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
{ skipatstart += 6; options |= PCRE_UCP; continue; }
@@ -7805,8 +7804,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
if (cd->fcc[re->first_char] != re->first_char)
re->flags |= PCRE_FCH_CASELESS;
}
- else if ((options & PCRE_UCP) != 0
- && UCD_OTHERCASE(re->first_char) != re->first_char)
+ else if (UCD_OTHERCASE(re->first_char) != re->first_char)
re->flags |= PCRE_FCH_CASELESS;
}
else
@@ -7843,13 +7841,12 @@ if (reqchar >= 0 &&
/* We ignore non-ASCII first chars in 8 bit mode. */
if (utf)
{
- if (re->first_char < 128)
+ if (re->req_char < 128)
{
- if (cd->fcc[re->first_char] != re->first_char)
+ if (cd->fcc[re->req_char] != re->req_char)
re->flags |= PCRE_RCH_CASELESS;
}
- else if ((options & PCRE_UCP) != 0
- && UCD_OTHERCASE(re->first_char) != re->first_char)
+ else if (UCD_OTHERCASE(re->req_char) != re->req_char)
re->flags |= PCRE_RCH_CASELESS;
}
else
diff --git a/pcre_config.c b/pcre_config.c
index bf42c02..a7792f7 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -62,13 +62,26 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_config(int what, void *where)
+#endif
{
switch (what)
{
case PCRE_CONFIG_UTF8:
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF8 && defined COMPILE_PCRE8
+ *((int *)where) = 1;
+#else
+ *((int *)where) = 0;
+#endif
+ break;
+
+ case PCRE_CONFIG_UTF16:
+#if defined SUPPORT_UTF16 && defined COMPILE_PCRE16
*((int *)where) = 1;
#else
*((int *)where) = 0;
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 1bc96c1..7cceaae 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -3202,7 +3202,7 @@ if (!anchored)
{
first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (first_char > 127 && utf && md->use_ucp)
+ if (utf && first_char > 127)
first_char2 = UCD_OTHERCASE(first_char);
#endif
}
@@ -3226,7 +3226,7 @@ if ((re->flags & PCRE_REQCHSET) != 0)
{
req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (req_char > 127 && utf && md->use_ucp)
+ if (utf && req_char > 127)
req_char2 = UCD_OTHERCASE(req_char);
#endif
}
diff --git a/pcre_exec.c b/pcre_exec.c
index bb1b60a..5f0a156 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6267,7 +6267,7 @@ if (!anchored)
{
first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (first_char > 127 && utf && md->use_ucp)
+ if (utf && first_char > 127)
first_char2 = UCD_OTHERCASE(first_char);
#endif
}
@@ -6289,7 +6289,7 @@ if ((re->flags & PCRE_REQCHSET) != 0)
{
req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (req_char > 127 && utf && md->use_ucp)
+ if (utf && req_char > 127)
req_char2 = UCD_OTHERCASE(req_char);
#endif
}
diff --git a/pcre_internal.h b/pcre_internal.h
index 4046e41..b93101f 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1166,7 +1166,12 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR "UTF8)"
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR "UTF8)"
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR "UTF16)"
+#endif
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
@@ -1421,7 +1426,12 @@ only. */
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
+#endif
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index df158be..3b85b85 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -297,7 +297,7 @@ typedef struct compiler_common {
jump_list *casefulcmp;
jump_list *caselesscmp;
BOOL jscript_compat;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
BOOL utf;
#ifdef SUPPORT_UCP
BOOL use_ucp;
@@ -306,7 +306,7 @@ typedef struct compiler_common {
#ifdef COMPILE_PCRE8
jump_list *utfreadtype8;
#endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
#ifdef SUPPORT_UCP
jump_list *getucd;
#endif
@@ -500,7 +500,7 @@ switch(*cc)
return cc + 1;
case OP_ANYBYTE:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf) return NULL;
#endif
return cc + 1;
@@ -576,6 +576,8 @@ switch(*cc)
case OP_NOTPROP:
case OP_PROP:
+ return cc + 1 + 2;
+
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEEXACT:
@@ -1267,7 +1269,7 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar*
/* Detects if the character has an othercase. */
unsigned int c;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
GETCHAR(c, cc);
@@ -1279,6 +1281,9 @@ if (common->utf)
return FALSE;
#endif
}
+#ifndef COMPILE_PCRE8
+ return common->fcc[c] != c;
+#endif
}
else
#endif
@@ -1769,6 +1774,9 @@ if (newlinecheck)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
+#ifdef COMPILE_PCRE16
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+#endif
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
nl = JUMP(SLJIT_JUMP);
}
@@ -1776,7 +1784,7 @@ if (newlinecheck)
mainloop = LABEL();
/* Increasing the STR_PTR here requires one less jump in the most common case. */
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf) readuchar = TRUE;
#endif
if (newlinecheck) readuchar = TRUE;
@@ -1843,7 +1851,7 @@ if (caseless)
{
oc = TABLE_GET(first_char, common->fcc, first_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (first_char > 127 && common->utf && common->use_ucp)
+ if (first_char > 127 && common->utf)
oc = UCD_OTHERCASE(first_char);
#endif
}
@@ -2077,7 +2085,7 @@ if (caseless)
{
oc = TABLE_GET(req_char, common->fcc, req_char);
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
- if (req_char > 127 && common->utf && common->use_ucp)
+ if (req_char > 127 && common->utf)
oc = UCD_OTHERCASE(req_char);
#endif
}
@@ -2265,7 +2273,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2289,7 +2297,7 @@ COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
@@ -2323,7 +2331,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
@@ -2415,8 +2423,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
#undef CHAR1
#undef CHAR2
-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
{
@@ -2436,8 +2443,7 @@ while (src1 < end1)
return src2;
}
-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
compare_context* context, jump_list **fallbacks)
@@ -2445,7 +2451,7 @@ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless,
DEFINE_COMPILER;
unsigned int othercasebit = 0;
pcre_uchar *othercasechar = NULL;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
int utflength;
#endif
@@ -2588,7 +2594,7 @@ do
#endif
cc++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
utflength--;
}
while (utflength > 0);
@@ -2646,7 +2652,7 @@ if ((*cc++ & XCL_MAP) != 0)
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
#ifndef COMPILE_PCRE8
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
if (common->utf)
jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
#endif
@@ -2660,7 +2666,7 @@ if ((*cc++ & XCL_MAP) != 0)
#ifndef COMPILE_PCRE8
JUMPHERE(jump);
-#elif defined SUPPORT_UTF8
+#elif defined SUPPORT_UTF
if (common->utf)
JUMPHERE(jump);
#endif
@@ -2795,7 +2801,7 @@ while (*cc != XCL_END)
if (*cc == XCL_SINGLE)
{
cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
GETCHARINC(c, cc);
@@ -2826,7 +2832,7 @@ while (*cc != XCL_END)
else if (*cc == XCL_RANGE)
{
cc ++;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
GETCHARINC(c, cc);
@@ -2835,7 +2841,7 @@ while (*cc != XCL_END)
#endif
c = *cc++;
SET_CHAR_OFFSET(c);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
GETCHARINC(c, cc);
@@ -2963,7 +2969,7 @@ int length;
unsigned int c, oc, bit;
compare_context context;
struct sljit_jump *jump[4];
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
struct sljit_label *label;
#ifdef SUPPORT_UCP
pcre_uchar propdata[5];
@@ -3063,7 +3069,7 @@ switch(type)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
return cc;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
#ifdef SUPPORT_UCP
case OP_NOTPROP:
case OP_PROP:
@@ -3279,7 +3285,7 @@ switch(type)
}
check_input_end(common, fallbacks);
read_char(common);
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
GETCHAR(c, cc);
@@ -3296,16 +3302,14 @@ switch(type)
case OP_NOT:
case OP_NOTI:
+ check_input_end(common, fallbacks);
+ length = 1;
#ifdef SUPPORT_UTF
if (common->utf)
{
- length = 1;
- if (HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
-
- check_input_end(common, fallbacks);
- GETCHAR(c, cc);
-
- if (c <= 127)
+#ifdef COMPILE_PCRE8
+ c = *cc;
+ if (c < 128)
{
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
if (type == OP_NOT || !char_has_othercase(common, cc))
@@ -3317,24 +3321,24 @@ switch(type)
add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
}
/* Skip the variable-length character. */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
-#ifdef COMPILE_PCRE8
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
-#endif
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
JUMPHERE(jump[0]);
- return cc + length;
+ return cc + 1;
}
else
+#endif /* COMPILE_PCRE8 */
+ {
+ GETCHARLEN(c, cc, length);
read_char(common);
+ }
}
else
-#endif
+#endif /* SUPPORT_UTF */
{
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+ read_char(common);
c = *cc;
}
@@ -3363,10 +3367,11 @@ switch(type)
read_char(common);
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
jump[0] = NULL;
-#ifdef SUPPORT_UTF8
- /* This check can only be skipped in pure 8 bit mode. */
+#ifdef COMPILE_PCRE8
+ /* This check only affects 8 bit mode. In other modes, we
+ always need to compare the value with 255. */
if (common->utf)
-#endif
+#endif /* COMPILE_PCRE8 */
{
jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
if (type == OP_CLASS)
@@ -3375,7 +3380,7 @@ switch(type)
jump[0] = NULL;
}
}
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
@@ -3385,7 +3390,7 @@ switch(type)
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
if (jump[0] != NULL)
JUMPHERE(jump[0]);
-#endif
+#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
return cc + 32 / sizeof(pcre_uchar);
#if defined SUPPORT_UTF || defined COMPILE_PCRE16
@@ -3399,7 +3404,7 @@ switch(type)
SLJIT_ASSERT(length > 0);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
if (common->utf)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
@@ -3411,7 +3416,7 @@ switch(type)
return cc + LINK_SIZE;
}
#endif
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
return cc + LINK_SIZE;
}
@@ -3548,8 +3553,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
if (withchecks && !common->jscript_compat)
add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
-#ifdef SUPPORT_UTF8
-#ifdef SUPPORT_UCP
+#if defined SUPPORT_UTF && defined SUPPORT_UCP
if (common->utf && *cc == OP_REFI)
{
SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
@@ -3567,8 +3571,7 @@ if (common->utf && *cc == OP_REFI)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
}
else
-#endif
-#endif
+#endif /* SUPPORT_UTF && SUPPORT_UCP */
{
OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
if (withchecks)
@@ -6422,7 +6425,7 @@ common->vspace = NULL;
common->casefulcmp = NULL;
common->caselesscmp = NULL;
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
common->utf = (re->options & PCRE_UTF8) != 0;
#ifdef SUPPORT_UCP
@@ -6432,7 +6435,7 @@ common->utfreadchar = NULL;
#ifdef COMPILE_PCRE8
common->utfreadtype8 = NULL;
#endif
-#endif /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
#ifdef SUPPORT_UCP
common->getucd = NULL;
#endif
diff --git a/pcre_jit_test.c b/pcre_jit_test.c
index 8a03272..e4d2432 100644
--- a/pcre_jit_test.c
+++ b/pcre_jit_test.c
@@ -51,18 +51,35 @@ POSSIBILITY OF SUCH DAMAGE.
#define PCRE_BUG 0x80000000
/*
- Hungarian utf8 characters
- \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
- \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
- \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
- \xc2\x85 = 0x85 (NExt Line = NEL)
- \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
- \xe2\x80\xa8 = 0x2028 (Line Separator)
- \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
- \xcc\x8d = 781 (Something with Mark property)
+ Letter characters:
+ \xe6\x92\xad = 0x64ad = 25773 (kanji)
+ Non-letter characters:
+ \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
+ \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
+ Newlines:
+ \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
+ \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
+ Othercase pairs:
+ \xc3\xa9 = 0xe9 = 233 (e')
+ \xc3\x89 = 0xc9 = 201 (E')
+ \xc3\xa1 = 0xe1 = 225 (a')
+ \xc3\x81 = 0xc1 = 193 (A')
+ \xc8\xba = 0x23a = 570
+ \xe2\xb1\xa5 = 0x2c65 = 11365
+ \xe1\xbd\xb8 = 0x1f78 = 8056
+ \xe1\xbf\xb8 = 0x1ff8 = 8184
+ \xf0\x90\x90\x80 = 0x10400 = 66560
+ \xf0\x90\x90\xa8 = 0x10428 = 66600
+ Mark property:
+ \xcc\x8d = 0x30d = 781
+ Special:
+ \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
+ \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
+ \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
+ \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
+ \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
*/
-static void setstack(pcre_extra *extra);
static int regression_tests(void);
int main(void)
@@ -76,21 +93,12 @@ int main(void)
return regression_tests();
}
-static pcre_jit_stack* callback(void *arg)
-{
- return (pcre_jit_stack *)arg;
-}
-
-static void setstack(pcre_extra *extra)
-{
- static pcre_jit_stack *stack;
- if (stack) pcre_jit_stack_free(stack);
- stack = pcre_jit_stack_alloc(1, 1024 * 1024);
- pcre_assign_jit_stack(extra, callback, stack);
-}
-
/* --------------------------------------------------------------------------------------- */
+#if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
+#error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
+#endif
+
#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
#define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
#define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
@@ -139,6 +147,10 @@ static struct regression_test_case regression_test_cases[] = {
{ CMA, 0, "\\Ca", "CDA" },
{ MA, 0, "\\Cx", "cda" },
{ CMA, 0, "\\Cx", "CDA" },
+ { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+ { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+ { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
+ { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
/* Assertions. */
{ MUA, 0, "\\b[^A]", "A_B#" },
@@ -151,6 +163,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
{ MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
{ MUA, 0, "\\b.", "\xcd\xbe" },
+ { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
{ MA, 0, "\\R^", "\n" },
{ MA, 1, "^", "\n" },
{ 0, 0, "^ab", "ab" },
@@ -267,6 +280,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MUA, 0, "\\b\\w+\\B", "x,a_cd" },
{ MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
{ CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
+ { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
/* Basic character sets. */
{ MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
@@ -449,6 +463,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
{ MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
{ PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
+ { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
/* Assertions. */
{ MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
@@ -601,111 +616,328 @@ static struct regression_test_case regression_test_cases[] = {
{ 0, 0, NULL, NULL }
};
+pcre_jit_stack* callback(void *arg)
+{
+ return (pcre_jit_stack *)arg;
+}
+
+static void setstack(pcre_extra *extra, int realloc)
+{
+ static pcre_jit_stack *stack;
+
+ if (realloc) {
+ if (stack)
+ pcre_jit_stack_free(stack);
+ stack = pcre_jit_stack_alloc(1, 1024 * 1024);
+ }
+ /* Extra can be NULL. */
+ pcre_assign_jit_stack(extra, callback, stack);
+}
+
+#ifdef SUPPORT_PCRE16
+
+static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
+{
+ unsigned char *ptr = (unsigned char*)input;
+ PCRE_SCHAR16 *optr = output;
+ unsigned int c;
+
+ if (max_length == 0)
+ return 0;
+
+ while (*ptr && max_length > 1) {
+ c = 0;
+ if (offsetmap)
+ *offsetmap++ = (int)(ptr - (unsigned char*)input);
+
+ if (!(*ptr & 0x80))
+ c = *ptr++;
+ else if (!(*ptr & 0x20)) {
+ c = ((ptr[0] & 0x1f) << 6) | (ptr[1] & 0x3f);
+ ptr += 2;
+ } else if (!(*ptr & 0x10)) {
+ c = ((ptr[0] & 0x0f) << 12) | ((ptr[1] & 0x3f) << 6) | (ptr[2] & 0x3f);
+ ptr += 3;
+ } else if (!(*ptr & 0x08)) {
+ c = ((ptr[0] & 0x07) << 18) | ((ptr[1] & 0x3f) << 12) | ((ptr[2] & 0x3f) << 6) | (ptr[3] & 0x3f);
+ ptr += 4;
+ }
+
+ if (c < 65536) {
+ *optr++ = c;
+ max_length--;
+ } else if (max_length <= 2) {
+ *optr = '\0';
+ return optr - output;
+ } else {
+ c -= 0x10000;
+ *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
+ *optr++ = 0xdc00 | (c & 0x3ff);
+ max_length -= 2;
+ if (offsetmap)
+ offsetmap++;
+ }
+ }
+ if (offsetmap)
+ *offsetmap = (int)(ptr - (unsigned char*)input);
+ *optr = '\0';
+ return optr - output;
+}
+
+static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
+{
+ PCRE_SCHAR16 *optr = output;
+
+ if (max_length == 0)
+ return 0;
+
+ while (*input && max_length > 1) {
+ *optr++ = *input++;
+ max_length--;
+ }
+ *optr = '\0';
+ return optr - output;
+}
+
+#define REGTEST_MAX_LENGTH 4096
+static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
+static int regtest_offsetmap[REGTEST_MAX_LENGTH];
+
+#endif /* SUPPORT_PCRE16 */
+
static int regression_tests(void)
{
- pcre *re;
struct regression_test_case *current = regression_test_cases;
const char *error;
- pcre_extra *extra;
- int utf8 = 0, ucp = 0;
- int ovector1[32];
- int ovector2[32];
- int return_value1, return_value2;
- int i, err_offs;
- int total = 0, succesful = 0;
+ int i, err_offs, is_succesful;
+ int total = 0;
+ int succesful = 0;
int counter = 0;
- int disabled_flags = PCRE_BUG;
+#ifdef SUPPORT_PCRE8
+ pcre *re8;
+ pcre_extra *extra8;
+ int ovector8_1[32];
+ int ovector8_2[32];
+ int return_value8_1, return_value8_2;
+ int utf8 = 0, ucp8 = 0;
+ int disabled_flags8 = PCRE_BUG;
+#endif
+#ifdef SUPPORT_PCRE16
+ pcre *re16;
+ pcre_extra *extra16;
+ int ovector16_1[32];
+ int ovector16_2[32];
+ int return_value16_1, return_value16_2;
+ int utf16 = 0, ucp16 = 0;
+ int disabled_flags16 = PCRE_BUG;
+ int length16;
+#endif
/* This test compares the behaviour of interpreter and JIT. Although disabling
- utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
+ utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
still considered successful from pcre_jit_test point of view. */
+ printf("Running JIT regression\n");
+
+#ifdef SUPPORT_PCRE8
pcre_config(PCRE_CONFIG_UTF8, &utf8);
- pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
+ pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
if (!utf8)
- disabled_flags |= PCRE_UTF8;
- if (!ucp)
- disabled_flags |= PCRE_UCP;
+ disabled_flags8 |= PCRE_UTF8;
+ if (!ucp8)
+ disabled_flags8 |= PCRE_UCP;
+ printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
+#endif
+#ifdef SUPPORT_PCRE16
+ pcre16_config(PCRE_CONFIG_UTF16, &utf16);
+ pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
+ if (!utf16)
+ disabled_flags16 |= PCRE_UTF8;
+ if (!ucp16)
+ disabled_flags16 |= PCRE_UCP;
+ printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
+#endif
- printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
while (current->pattern) {
/* printf("\nPattern: %s :\n", current->pattern); */
total++;
error = NULL;
- re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
-
- if (!re) {
- if (utf8 && ucp)
- printf("\nCannot compile pattern: %s\n", current->pattern);
- else {
- /* Some patterns cannot be compiled when either of utf8
- or ucp is disabled. We just skip them. */
- printf(".");
- succesful++;
+#ifdef SUPPORT_PCRE8
+ re8 = pcre_compile(current->pattern,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
+ &error, &err_offs, NULL);
+
+ if (re8) {
+ error = NULL;
+ extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
+ if (!extra8) {
+ printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
+ pcre_free(re8);
+ re8 = NULL;
}
- current++;
- continue;
- }
-
- error = NULL;
- extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
- if (!extra) {
- printf("\nCannot study pattern: %s\n", current->pattern);
- current++;
- continue;
- }
-
- if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
- printf("\nJIT compiler does not support: %s\n", current->pattern);
- current++;
- continue;
- }
+ if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+ printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
+ pcre_free_study(extra8);
+ pcre_free(re8);
+ re8 = NULL;
+ }
+ } else if (utf8 && ucp8)
+ printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif
+#ifdef SUPPORT_PCRE16
+ convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
+ re16 = pcre16_compile(regtest_buf,
+ current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
+ &error, &err_offs, NULL);
+ if (re16) {
+ error = NULL;
+ extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
+ if (!extra16) {
+ printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
+ pcre_free(re16);
+ re16 = NULL;
+ }
+ if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+ printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
+ pcre_free_study(extra16);
+ pcre_free(re16);
+ re16 = NULL;
+ }
+ } else if (utf16 && ucp16)
+ printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
+#endif
counter++;
if ((counter & 0x3) != 0)
- setstack(extra);
-
- for (i = 0; i < 32; ++i)
- ovector1[i] = -2;
- return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
+ setstack(NULL, 1);
+
+#ifdef SUPPORT_PCRE8
+ if (re8) {
+ setstack(extra8, 0);
+ for (i = 0; i < 32; ++i)
+ ovector8_1[i] = -2;
+ return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
+
+ for (i = 0; i < 32; ++i)
+ ovector8_2[i] = -2;
+ return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
+ }
+#endif
- for (i = 0; i < 32; ++i)
- ovector2[i] = -2;
- return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
+#ifdef SUPPORT_PCRE16
+ if (re16) {
+ setstack(extra16, 0);
+ if (current->flags & PCRE_UTF8)
+ length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
+ else
+ length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
+
+ for (i = 0; i < 32; ++i)
+ ovector16_1[i] = -2;
+ return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
+
+ for (i = 0; i < 32; ++i)
+ ovector16_2[i] = -2;
+ return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset,
+ current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
+ }
+#endif
/* If PCRE_BUG is set, just run the test, but do not compare the results.
Segfaults can still be captured. */
- if (!(current->flags & PCRE_BUG)) {
- if (return_value1 != return_value2) {
- printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
- current++;
- continue;
- }
- if (return_value1 >= 0) {
- return_value1 *= 2;
- err_offs = 0;
- for (i = 0; i < return_value1; ++i)
- if (ovector1[i] != ovector2[i]) {
- printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
- err_offs = 1;
+ is_succesful = 1;
+ if (!(current->flags & PCRE_BUG)) {
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
+ if ((current->flags & PCRE_UTF8) && utf8 && utf16) {
+ /* All results must be the same. */
+ if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
+ printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
+ return_value8_1, return_value8_2, return_value16_1, return_value16_2,
+ total, current->pattern, current->input);
+ is_succesful = 0;
+ } else if (return_value8_1 >= 0) {
+ return_value8_1 *= 2;
+ /* Transform back the results. */
+ for (i = 0; i < return_value8_1; ++i) {
+ if (ovector16_1[i] >= 0)
+ ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
+ if (ovector16_2[i] >= 0)
+ ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
}
- if (err_offs) {
- current++;
- continue;
+
+ for (i = 0; i < return_value8_1; ++i)
+ if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
+ printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
+ i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
+ total, current->pattern, current->input);
+ is_succesful = 0;
+ }
+ }
+ } else {
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
+ /* Only the 8 bit and 16 bit results must be equal. */
+#ifdef SUPPORT_PCRE8
+ if (return_value8_1 != return_value8_2) {
+ printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ return_value8_1, return_value8_2, total, current->pattern, current->input);
+ is_succesful = 0;
+ } else if (return_value8_1 >= 0) {
+ return_value8_1 *= 2;
+ for (i = 0; i < return_value8_1; ++i)
+ if (ovector8_1[i] != ovector8_2[i]) {
+ printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+ i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
+ is_succesful = 0;
+ }
}
+#endif
+
+#ifdef SUPPORT_PCRE16
+ if (return_value16_1 != return_value16_2) {
+ printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
+ return_value16_1, return_value16_2, total, current->pattern, current->input);
+ is_succesful = 0;
+ } else if (return_value16_1 >= 0) {
+ return_value16_1 *= 2;
+ for (i = 0; i < return_value16_1; ++i)
+ if (ovector16_1[i] != ovector16_2[i]) {
+ printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s' \n",
+ i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
+ is_succesful = 0;
+ }
+ }
+#endif
+
+#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
}
+#endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
}
- pcre_free_study(extra);
- pcre_free(re);
+ if (is_succesful)
+ succesful++;
+
+#ifdef SUPPORT_PCRE8
+ if (re8) {
+ pcre_free_study(extra8);
+ pcre_free(re8);
+ }
+#endif
+#ifdef SUPPORT_PCRE16
+ if (re16) {
+ pcre16_free_study(extra16);
+ pcre_free(re16);
+ }
+#endif
/* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
printf(".");
fflush(stdout);
current++;
- succesful++;
}
if (total == succesful) {
@@ -717,4 +949,5 @@ static int regression_tests(void)
}
}
+
/* End of pcre_jit_test.c */