summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2019-10-16 17:12:13 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2019-10-16 17:12:13 +0000
commitaeb1ba019ce21ada87fd59613743d0355c3fc85f (patch)
tree5a2ba14c4988e0b599b73a625cba5617b10b015a
parentb89f537ffecd65dd04544d2a68620917a7b925e7 (diff)
downloadpcre2-aeb1ba019ce21ada87fd59613743d0355c3fc85f.tar.gz
Fix error offset bug introduced at 1176.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--src/pcre2_match.c4
-rw-r--r--testdata/testinput106
-rw-r--r--testdata/testoutput1010
3 files changed, 20 insertions, 0 deletions
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index cf3032e..48e7b9d 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6184,6 +6184,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
}
match_data->subject = NULL;
+/* Zero the error offset in case the first code unit is invalid UTF. */
+
+match_data->startchar = 0;
+
/* ============================= JIT matching ============================== */
diff --git a/testdata/testinput10 b/testdata/testinput10
index dfcb145..3813709 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -579,4 +579,10 @@
/(?:\x{ff}|\x{3000})/I,utf
+/x/utf
+ abxyz
+ \x80\=startchar
+ abc\x80\=startchar
+ abc\x80\=startchar,offset=3
+
# End of testinput10
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 4664b5a..775c2ab 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1803,4 +1803,14 @@ Options: utf
Starting code units: \xc3 \xe3
Subject length lower bound = 1
+/x/utf
+ abxyz
+ 0: x
+ \x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
+ abc\x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
+ abc\x80\=startchar,offset=3
+Error -36 (bad UTF-8 offset)
+
# End of testinput10