diff options
-rw-r--r-- | src/pcre2_match.c | 4 | ||||
-rw-r--r-- | testdata/testinput10 | 6 | ||||
-rw-r--r-- | testdata/testoutput10 | 10 |
3 files changed, 20 insertions, 0 deletions
diff --git a/src/pcre2_match.c b/src/pcre2_match.c index cf3032e..48e7b9d 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6184,6 +6184,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) } match_data->subject = NULL; +/* Zero the error offset in case the first code unit is invalid UTF. */ + +match_data->startchar = 0; + /* ============================= JIT matching ============================== */ diff --git a/testdata/testinput10 b/testdata/testinput10 index dfcb145..3813709 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -579,4 +579,10 @@ /(?:\x{ff}|\x{3000})/I,utf +/x/utf + abxyz + \x80\=startchar + abc\x80\=startchar + abc\x80\=startchar,offset=3 + # End of testinput10 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 4664b5a..775c2ab 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1803,4 +1803,14 @@ Options: utf Starting code units: \xc3 \xe3 Subject length lower bound = 1 +/x/utf + abxyz + 0: x + \x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0 + abc\x80\=startchar +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3 + abc\x80\=startchar,offset=3 +Error -36 (bad UTF-8 offset) + # End of testinput10 |