From aeb1ba019ce21ada87fd59613743d0355c3fc85f Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 16 Oct 2019 17:12:13 +0000 Subject: Fix error offset bug introduced at 1176. git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069 --- src/pcre2_match.c | 4 ++++ testdata/testinput10 | 6 ++++++ testdata/testoutput10 | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/src/pcre2_match.c b/src/pcre2_match.c index cf3032e..48e7b9d 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6184,6 +6184,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) } match_data->subject = NULL; +/* Zero the error offset in case the first code unit is invalid UTF. */ + +match_data->startchar = 0; + /* ============================= JIT matching ============================== */ diff --git a/testdata/testinput10 b/testdata/testinput10 index dfcb145..3813709 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -579,4 +579,10 @@ /(?:\x{ff}|\x{3000})/I,utf +/x/utf + abxyz + \x80\=startchar + abc\x80\=startchar + abc\x80\=startchar,offset=3 + # End of testinput10 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 4664b5a..775c2ab 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1803,4 +1803,14 @@ Options: utf Starting code units: \xc3 \xe3 Subject length lower bound = 1 +/x/utf + abxyz + 0: x + \x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 + abc\x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 + abc\x80\=startchar,offset=3 +Error -36 (bad UTF-8 offset) + # End of testinput10 -- cgit v1.2.1