From c50cfc4d3d60e69040c9d2f3836b12e886b587ac Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 7 May 2020 15:46:08 +0200 Subject: Add quiet parameter to internal HTML entities API In some places, we need to make sure that no warnings are thrown due to unknown encoding. The error reporting code tried to avoid this by determining a "safe charset", but this introduces subtle discrepancies in which charset is picked (normally internal_encoding takes precedence). Avoid this by suppressing the warning in the first place. While here, use the fallback logic to print error messages with substitution characters more consistently, to avoid skipping parts of the error message entirely. --- main/main.c | 49 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) (limited to 'main/main.c') diff --git a/main/main.c b/main/main.c index dc9182e481..98aaa5bece 100644 --- a/main/main.c +++ b/main/main.c @@ -95,31 +95,6 @@ PHPAPI size_t core_globals_offset; #define SAFE_FILENAME(f) ((f)?(f):"-") -static char *get_safe_charset_hint(void) { - ZEND_TLS char *lastHint = NULL; - ZEND_TLS char *lastCodeset = NULL; - char *hint = SG(default_charset); - size_t len = strlen(hint); - size_t i = 0; - - if (lastHint == SG(default_charset)) { - return lastCodeset; - } - - lastHint = hint; - lastCodeset = NULL; - - for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) { - if (len == charset_map[i].codeset_len - && zend_binary_strcasecmp(hint, len, charset_map[i].codeset, len) == 0) { - lastCodeset = (char*)charset_map[i].codeset; - break; - } - } - - return lastCodeset; -} - /* {{{ PHP_INI_MH */ static PHP_INI_MH(OnSetFacility) @@ -937,6 +912,19 @@ PHPAPI size_t php_printf(const char *format, ...) } /* }}} */ +static zend_string *escape_html(const char *buffer, size_t buffer_len) { + zend_string *result = php_escape_html_entities_ex( + (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT, + /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1); + if (!result || ZSTR_LEN(result) == 0) { + /* Retry with substituting invalid chars on fail. */ + result = php_escape_html_entities_ex( + (const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, + /* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1); + } + return result; +} + /* {{{ php_verror */ /* php_verror is called from php_error_docref functions. * Its purpose is to unify error messages and automatically generate clickable @@ -962,12 +950,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ buffer_len = (int)vspprintf(&buffer, 0, format, args); if (PG(html_errors)) { - replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint()); - /* Retry with substituting invalid chars on fail. */ - if (!replace_buffer || ZSTR_LEN(replace_buffer) < 1) { - replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, get_safe_charset_hint()); - } - + replace_buffer = escape_html(buffer, buffer_len); efree(buffer); if (replace_buffer) { @@ -1032,7 +1015,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ } if (PG(html_errors)) { - replace_origin = php_escape_html_entities((unsigned char*)origin, origin_len, 0, ENT_COMPAT, get_safe_charset_hint()); + replace_origin = escape_html(origin, origin_len); efree(origin); origin = ZSTR_VAL(replace_origin); } @@ -1335,7 +1318,7 @@ static ZEND_COLD void php_error_cb(int orig_type, const char *error_filename, co if (PG(html_errors)) { if (type == E_ERROR || type == E_PARSE) { - zend_string *buf = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint()); + zend_string *buf = escape_html(buffer, buffer_len); php_printf("%s
\n%s: %s in %s on line %" PRIu32 "
\n%s", STR_PRINT(prepend_string), error_type_str, ZSTR_VAL(buf), error_filename, error_lineno, STR_PRINT(append_string)); zend_string_free(buf); } else { -- cgit v1.2.1