summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-09-15 14:36:23 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2020-09-15 14:36:23 +0000
commitc23be766617cbfcb14e56dc5f1f01289077bd125 (patch)
tree4eb913409fb7da41356a1edaf016e9ea46812114
parent075c6e67db068685a87233580579fccecfb7f27a (diff)
downloadpcre2-c23be766617cbfcb14e56dc5f1f01289077bd125.tar.gz
Fix Bugzilla #2642: no match bug in 8-bit mode for caseless invalid utf
matching. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1272 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog7
-rw-r--r--src/pcre2_match.c10
-rw-r--r--testdata/testinput103
-rw-r--r--testdata/testoutput104
4 files changed, 22 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index baf2b8c..847476c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -66,6 +66,13 @@ this case have been moved from test 1 to test 2.
12. Further to 10 above, pcre2test has been updated to detect and grumble if a
delimiter other than / is used after #perltest.
+13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
+was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
+the start of a match was not resetting correctly after a failed match on the
+first valid fragment of the subject, possibly causing incorrect "no match"
+returns on subsequent fragments. For example, the pattern /A/ failed to match
+the subject \xe5A. Fixes Bugzilla #2642.
+
Version 10.35 09-May-2020
---------------------------
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 11289d5..3372410 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6115,8 +6115,8 @@ BOOL has_req_cu = FALSE;
BOOL startline;
#if PCRE2_CODE_UNIT_WIDTH == 8
-BOOL memchr_not_found_first_cu = FALSE;
-BOOL memchr_not_found_first_cu2 = FALSE;
+BOOL memchr_not_found_first_cu;
+BOOL memchr_not_found_first_cu2;
#endif
PCRE2_UCHAR first_cu = 0;
@@ -6709,6 +6709,11 @@ FRAGMENT_RESTART:
start_partial = match_partial = NULL;
mb->hitend = FALSE;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+memchr_not_found_first_cu = FALSE;
+memchr_not_found_first_cu2 = FALSE;
+#endif
+
for(;;)
{
PCRE2_SPTR new_start_match;
@@ -7187,6 +7192,7 @@ if (utf && end_subject != true_end_subject &&
starting code units in 8-bit and 16-bit modes. */
start_match = end_subject + 1;
+
#if PCRE2_CODE_UNIT_WIDTH != 32
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
start_match++;
diff --git a/testdata/testinput10 b/testdata/testinput10
index b3c3197..efd3298 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -610,4 +610,7 @@
/X(\x{e1})Y/replace=>\U$1<,substitute_extended
X\x{e1}Y
+/A/utf,match_invalid_utf,caseless
+ \xe5A
+
# End of testinput10
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 59af535..2a3803f 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1871,4 +1871,8 @@ Subject length lower bound = 1
X\x{e1}Y
1: >\xe1<
+/A/utf,match_invalid_utf,caseless
+ \xe5A
+ 0: A
+
# End of testinput10