From cd5591a28d738b1b00c96c0e6cae91b490dba56d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 7 Feb 2020 17:01:39 +0100 Subject: PCRE: Only remember valid UTF-8 if start offset zero PCRE only validates the string starting from the start offset (minus maximum look-behind, but let's ignore that), so we can only remember that the string is fully valid UTF-8 is the original start offset is zero. --- ext/pcre/tests/bug79241.phpt | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'ext/pcre/tests') diff --git a/ext/pcre/tests/bug79241.phpt b/ext/pcre/tests/bug79241.phpt index 92e5253735..f6dbb8bea4 100644 --- a/ext/pcre/tests/bug79241.phpt +++ b/ext/pcre/tests/bug79241.phpt @@ -15,8 +15,19 @@ var_dump(preg_match($pattern, $text, $matches, 0, 0)); var_dump(preg_match($pattern, $text, $matches, 0, 1)); var_dump(preg_last_error() == PREG_BAD_UTF8_OFFSET_ERROR); +echo "\n"; + +$text = "VA\xff"; $text .= "LID"; +var_dump(preg_match($pattern, $text, $matches, 0, 4)); +var_dump(preg_match($pattern, $text, $matches, 0, 0)); +var_dump(preg_last_error() == PREG_BAD_UTF8_ERROR); + ?> --EXPECT-- int(0) bool(false) bool(true) + +int(1) +bool(false) +bool(true) -- cgit v1.2.1