diff options
author | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-09-19 03:49:32 +0000 |
---|---|---|
committer | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2020-09-19 03:49:32 +0000 |
commit | 5002a59a8289027b8a88c4933077a9b66e839d6c (patch) | |
tree | aa163ef791f75cf3a0b8fe84d2e5cf4cf8118b85 | |
parent | c23be766617cbfcb14e56dc5f1f01289077bd125 (diff) | |
download | pcre2-5002a59a8289027b8a88c4933077a9b66e839d6c.tar.gz |
Fixed a bug in character set matching when JIT is enabled.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1273 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | src/pcre2_jit_compile.c | 36 | ||||
-rw-r--r-- | src/pcre2_jit_test.c | 1 |
3 files changed, 32 insertions, 8 deletions
@@ -73,6 +73,9 @@ first valid fragment of the subject, possibly causing incorrect "no match" returns on subsequent fragments. For example, the pattern /A/ failed to match the subject \xe5A. Fixes Bugzilla #2642. +14. Fixed a bug in character set matching when JIT is enabled and both unicode +scripts and unicode classes are present at the same time. + Version 10.35 09-May-2020 --------------------------- diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index edf64d5..04f0278 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -7672,25 +7672,43 @@ if (needstype || needsscript) } cc = ccbegin; - } - if (needschar) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + if (needstype) + { + /* TMP2 has already been shifted by 2 */ + if (!needschar) + { + OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + } + else + { + OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - if (needstype) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + typereg = RETURN_ADDR; + } + } + else if (needschar) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + } + else if (needstype) { + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + if (!needschar) { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); } else { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); @@ -7698,6 +7716,8 @@ if (needstype || needsscript) typereg = RETURN_ADDR; } } + else if (needschar) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); } #endif /* SUPPORT_UNICODE */ diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c index b7856ad..d935887 100644 --- a/src/pcre2_jit_test.c +++ b/src/pcre2_jit_test.c @@ -409,6 +409,7 @@ static struct regression_test_case regression_test_cases[] = { { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" }, + { MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " }, /* Possible empty brackets. */ { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, |