From cd5591a28d738b1b00c96c0e6cae91b490dba56d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 7 Feb 2020 17:01:39 +0100 Subject: PCRE: Only remember valid UTF-8 if start offset zero PCRE only validates the string starting from the start offset (minus maximum look-behind, but let's ignore that), so we can only remember that the string is fully valid UTF-8 is the original start offset is zero. --- ext/pcre/php_pcre.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'ext/pcre/php_pcre.c') diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 104b8d4c97..c50bd2fba2 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -1167,7 +1167,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, PCRE2_SPTR mark = NULL; /* Target for MARK name */ zval marks; /* Array of marks for PREG_PATTERN_ORDER */ pcre2_match_data *match_data; - PCRE2_SIZE start_offset2; + PCRE2_SIZE start_offset2, orig_start_offset; char *subject = ZSTR_VAL(subject_str); size_t subject_len = ZSTR_LEN(subject_str); @@ -1263,8 +1263,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, } } - options = (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, start_offset2) - ? 0 : PCRE2_NO_UTF_CHECK; + orig_start_offset = start_offset2; + options = + (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset) + ? 0 : PCRE2_NO_UTF_CHECK; /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT @@ -1454,7 +1456,8 @@ error: if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */ - if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) { + if ((pce->compile_options & PCRE2_UTF) + && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) { GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8); } -- cgit v1.2.1