summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2020-02-26 10:18:43 +0000
committerzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2020-02-26 10:18:43 +0000
commitdd37c27fef3592a02fef0a8d9b98a78268717fe9 (patch)
tree0a9ea0ebdc1d62a37dd393783e0377ef387a7373
parentf82582217bf85600b850b862148b8cb08bb7a2e5 (diff)
downloadpcre2-dd37c27fef3592a02fef0a8d9b98a78268717fe9.tar.gz
Follow ucp changes in JIT.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1230 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--src/pcre2_jit_compile.c54
-rw-r--r--testdata/testinput104
-rw-r--r--testdata/testinput124
-rw-r--r--testdata/testoutput104
-rw-r--r--testdata/testoutput12-164
-rw-r--r--testdata/testoutput12-324
6 files changed, 32 insertions, 42 deletions
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 8aa57dd..2de5538 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -476,7 +476,7 @@ typedef struct compiler_common {
#ifdef SUPPORT_UNICODE
BOOL utf;
BOOL invalid_utf;
- BOOL use_ucp;
+ BOOL ucp;
/* Points to saving area for iref. */
sljit_s32 iref_ptr;
jump_list *getucd;
@@ -3226,16 +3226,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR
unsigned int c;
#ifdef SUPPORT_UNICODE
-if (common->utf)
+if (common->utf || common->ucp)
{
- GETCHAR(c, cc);
- if (c > 127)
+ if (common->utf)
{
- return c != UCD_OTHERCASE(c);
+ GETCHAR(c, cc);
}
-#if PCRE2_CODE_UNIT_WIDTH != 8
+ else
+ c = *cc;
+
+ if (c > 127)
+ return c != UCD_OTHERCASE(c);
+
return common->fcc[c] != c;
-#endif
}
else
#endif
@@ -3247,10 +3250,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne
{
/* Returns with the othercase. */
#ifdef SUPPORT_UNICODE
-if (common->utf && c > 127)
- {
+if ((common->utf || common->ucp) && c > 127)
return UCD_OTHERCASE(c);
- }
#endif
return TABLE_GET(c, common->fcc, c);
}
@@ -3264,15 +3265,19 @@ int n;
#endif
#ifdef SUPPORT_UNICODE
-if (common->utf)
+if (common->utf || common->ucp)
{
- GETCHAR(c, cc);
+ if (common->utf)
+ {
+ GETCHAR(c, cc);
+ }
+ else
+ c = *cc;
+
if (c <= 127)
oc = common->fcc[c];
else
- {
oc = UCD_OTHERCASE(c);
- }
}
else
{
@@ -5493,7 +5498,12 @@ while (TRUE)
#endif
{
chr = *cc;
- othercase[0] = TABLE_GET(chr, common->fcc, chr);
+#ifdef SUPPORT_UNICODE
+ if (common->ucp && chr > 127)
+ othercase[0] = UCD_OTHERCASE(chr);
+ else
+#endif
+ othercase[0] = TABLE_GET(chr, common->fcc, chr);
}
}
else
@@ -5922,8 +5932,8 @@ oc = first_char;
if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
{
oc = TABLE_GET(first_char, common->fcc, first_char);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (first_char > 127 && common->utf)
+#if defined SUPPORT_UNICODE
+ if (first_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(first_char);
#endif
}
@@ -6133,8 +6143,8 @@ oc = req_char;
if (caseless)
{
oc = TABLE_GET(req_char, common->fcc, req_char);
-#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
- if (req_char > 127 && common->utf)
+#if defined SUPPORT_UNICODE
+ if (req_char > 127 && (common->utf || common->ucp))
oc = UCD_OTHERCASE(req_char);
#endif
}
@@ -6288,7 +6298,7 @@ else
/* Testing char type. */
#ifdef SUPPORT_UNICODE
-if (common->use_ucp)
+if (common->ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@@ -6334,7 +6344,7 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
valid_utf = LABEL();
-if (common->use_ucp)
+if (common->ucp)
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
@@ -13216,7 +13226,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
#ifdef SUPPORT_UNICODE
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
common->utf = (re->overall_options & PCRE2_UTF) != 0;
-common->use_ucp = (re->overall_options & PCRE2_UCP) != 0;
+common->ucp = (re->overall_options & PCRE2_UCP) != 0;
if (common->utf)
{
if (common->nltype == NLTYPE_ANY)
diff --git a/testdata/testinput10 b/testdata/testinput10
index be6d426..b3c3197 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -587,8 +587,6 @@
abc\x80\=startchar
abc\x80\=startchar,offset=3
-#subject no_jit
-
/\x{c1}+\x{e1}/iIB,ucp
\x{c1}\x{c1}\x{c1}
\x{e1}\x{e1}\x{e1}
@@ -612,6 +610,4 @@
/X(\x{e1})Y/replace=>\U$1<,substitute_extended
X\x{e1}Y
-#subject
-
# End of testinput10
diff --git a/testdata/testinput12 b/testdata/testinput12
index beaf643..fbfacc5 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -481,8 +481,6 @@
/[ab\x{120}]+/iB,ucp
aABb\x{121}\x{120}
-#subject no_jit
-
/\x{c1}/i,no_start_optimize
\= Expect no match
\x{e1}
@@ -532,8 +530,6 @@
/X(\x{121})Y/ucp,replace=>\U$1<,substitute_extended
X\x{121}Y
-#subject
-
# ----------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 9fe5ef6..59af535 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1817,8 +1817,6 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
abc\x80\=startchar,offset=3
Error -36 (bad UTF-8 offset)
-#subject no_jit
-
/\x{c1}+\x{e1}/iIB,ucp
------------------------------------------------------------------
Bra
@@ -1873,6 +1871,4 @@ Subject length lower bound = 1
X\x{e1}Y
1: >\xe1<
-#subject
-
# End of testinput10
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 6e545c3..9689ab1 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1670,8 +1670,6 @@ Subject length lower bound = 1
aABb\x{121}\x{120}
0: aABb\x{121}\x{120}
-#subject no_jit
-
/\x{c1}/i,no_start_optimize
\= Expect no match
\x{e1}
@@ -1763,8 +1761,6 @@ Subject length lower bound = 1
X\x{121}Y
1: >\x{120}<
-#subject
-
# ----------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 1a0783a..c51c517 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1668,8 +1668,6 @@ Subject length lower bound = 1
aABb\x{121}\x{120}
0: aABb\x{121}\x{120}
-#subject no_jit
-
/\x{c1}/i,no_start_optimize
\= Expect no match
\x{e1}
@@ -1761,8 +1759,6 @@ Subject length lower bound = 1
X\x{121}Y
1: >\x{120}<
-#subject
-
# ----------------------------------------------------
# End of testinput12