From 50eca61f68815005f3b0f808578cc1ce3b4297f0 Mon Sep 17 00:00:00 2001 From: Craig Francis Date: Wed, 6 Jan 2021 17:44:11 +0000 Subject: Use ENT_QUOTES|ENT_SUBSTITUTE default for HTML encoding and decoding functions htmlspecialchars() etc now use ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. Closes GH-6583. --- UPGRADING | 6 ++++++ ext/standard/basic_functions.stub.php | 10 +++++----- ext/standard/basic_functions_arginfo.h | 8 ++++---- ext/standard/html.c | 8 ++++---- ext/standard/tests/strings/bug53021.phpt | 2 +- ext/standard/tests/strings/bug61116.phpt | 4 ++-- ext/standard/tests/strings/html_entity_decode3.phpt | 2 +- ext/standard/tests/strings/htmlentities24.phpt | 2 +- ext/standard/tests/strings/htmlspecialchars.phpt | 2 +- ext/standard/tests/strings/htmlspecialchars_basic.phpt | 2 +- ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt | 4 ++-- .../tests/strings/htmlspecialchars_decode_variation3.phpt | 8 ++++---- .../tests/strings/htmlspecialchars_decode_variation4.phpt | 8 ++++---- .../tests/strings/htmlspecialchars_decode_variation5.phpt | 10 +++++----- 14 files changed, 41 insertions(+), 35 deletions(-) diff --git a/UPGRADING b/UPGRADING index 1b2fafcbc7..3c0939796e 100644 --- a/UPGRADING +++ b/UPGRADING @@ -69,6 +69,12 @@ PHP 8.1 UPGRADE NOTES - Standard: . version_compare() no longer accepts undocumented operator abbreviations. + . htmlspecialchars(), htmlentities(), htmlspecialchars_decode(), + html_entitity_decode() and get_html_translation_table() now use + ENT_QUOTES | ENT_SUBSTITUTE rather than ENT_COMPAT by default. This means + that ' is escaped to ' while previously it was left alone. + Additionally, malformed UTF-8 will be replaced by a Unicode substitution + character, instead of resulting in an empty string. ======================================== 2. New Features diff --git a/ext/standard/basic_functions.stub.php b/ext/standard/basic_functions.stub.php index 7776e7765d..b6597d262e 100755 --- a/ext/standard/basic_functions.stub.php +++ b/ext/standard/basic_functions.stub.php @@ -512,15 +512,15 @@ function headers_list(): array {} /* {{{ html.c */ -function htmlspecialchars(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {} +function htmlspecialchars(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {} -function htmlspecialchars_decode(string $string, int $flags = ENT_COMPAT): string {} +function htmlspecialchars_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE): string {} -function html_entity_decode(string $string, int $flags = ENT_COMPAT, ?string $encoding = null): string {} +function html_entity_decode(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null): string {} -function htmlentities(string $string, int $flags = ENT_COMPAT, ?string $encoding = null, bool $double_encode = true): string {} +function htmlentities(string $string, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, ?string $encoding = null, bool $double_encode = true): string {} -function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_COMPAT, string $encoding = "UTF-8"): array {} +function get_html_translation_table(int $table = HTML_SPECIALCHARS, int $flags = ENT_QUOTES | ENT_SUBSTITUTE, string $encoding = "UTF-8"): array {} /* }}} */ diff --git a/ext/standard/basic_functions_arginfo.h b/ext/standard/basic_functions_arginfo.h index d19184fbe6..1b72949a54 100644 --- a/ext/standard/basic_functions_arginfo.h +++ b/ext/standard/basic_functions_arginfo.h @@ -765,19 +765,19 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars, 0, 1, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, double_encode, _IS_BOOL, 0, "true") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_htmlspecialchars_decode, 0, 1, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_html_entity_decode, 0, 1, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 1, "null") ZEND_END_ARG_INFO() @@ -785,7 +785,7 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_get_html_translation_table, 0, 0, IS_ARRAY, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, table, IS_LONG, 0, "HTML_SPECIALCHARS") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_COMPAT") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "ENT_QUOTES | ENT_SUBSTITUTE") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, encoding, IS_STRING, 0, "\"UTF-8\"") ZEND_END_ARG_INFO() diff --git a/ext/standard/html.c b/ext/standard/html.c index a13f02dc06..b5e2cc4e66 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -1316,7 +1316,7 @@ encode_amp: static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) { zend_string *str, *hint_charset = NULL; - zend_long flags = ENT_COMPAT; + zend_long flags = ENT_QUOTES|ENT_SUBSTITUTE; zend_string *replaced; bool double_encode = 1; @@ -1367,7 +1367,7 @@ PHP_FUNCTION(htmlspecialchars) PHP_FUNCTION(htmlspecialchars_decode) { zend_string *str; - zend_long quote_style = ENT_COMPAT; + zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE; zend_string *replaced; ZEND_PARSE_PARAMETERS_START(1, 2) @@ -1385,7 +1385,7 @@ PHP_FUNCTION(htmlspecialchars_decode) PHP_FUNCTION(html_entity_decode) { zend_string *str, *hint_charset = NULL; - zend_long quote_style = ENT_COMPAT; + zend_long quote_style = ENT_QUOTES|ENT_SUBSTITUTE; zend_string *replaced; ZEND_PARSE_PARAMETERS_START(1, 3) @@ -1468,7 +1468,7 @@ static inline void write_s3row_data( PHP_FUNCTION(get_html_translation_table) { zend_long all = HTML_SPECIALCHARS, - flags = ENT_COMPAT; + flags = ENT_QUOTES|ENT_SUBSTITUTE; int doctype; entity_table_opt entity_table; const enc_to_uni *to_uni_table = NULL; diff --git a/ext/standard/tests/strings/bug53021.phpt b/ext/standard/tests/strings/bug53021.phpt index cdb408db63..4f70f213e8 100644 --- a/ext/standard/tests/strings/bug53021.phpt +++ b/ext/standard/tests/strings/bug53021.phpt @@ -38,4 +38,4 @@ single quotes variations: ' ' ' -' +' diff --git a/ext/standard/tests/strings/bug61116.phpt b/ext/standard/tests/strings/bug61116.phpt index f8f76cd504..01da21d975 100644 --- a/ext/standard/tests/strings/bug61116.phpt +++ b/ext/standard/tests/strings/bug61116.phpt @@ -10,7 +10,7 @@ Function [ function htmlspecialchars ] { - Parameters [4] { Parameter #0 [ string $string ] - Parameter #1 [ int $flags = ENT_COMPAT ] + Parameter #1 [ int $flags = ENT_QUOTES | ENT_SUBSTITUTE ] Parameter #2 [ ?string $encoding = null ] Parameter #3 [ bool $double_encode = true ] } @@ -21,7 +21,7 @@ Function [ function get_html_translation_table ] { - Parameters [3] { Parameter #0 [ int $table = HTML_SPECIALCHARS ] - Parameter #1 [ int $flags = ENT_COMPAT ] + Parameter #1 [ int $flags = ENT_QUOTES | ENT_SUBSTITUTE ] Parameter #2 [ string $encoding = "UTF-8" ] } - Return [ array ] diff --git a/ext/standard/tests/strings/html_entity_decode3.phpt b/ext/standard/tests/strings/html_entity_decode3.phpt index e8372ed4f8..6b5efd8f04 100644 --- a/ext/standard/tests/strings/html_entity_decode3.phpt +++ b/ext/standard/tests/strings/html_entity_decode3.phpt @@ -218,7 +218,7 @@ echo "\nDone.\n";  NOT DECODED  NOT DECODED DECODED -' NOT DECODED +' DECODED  NOT DECODED € NOT DECODED Ÿ NOT DECODED diff --git a/ext/standard/tests/strings/htmlentities24.phpt b/ext/standard/tests/strings/htmlentities24.phpt index 2a04bd345e..9ba5468040 100644 --- a/ext/standard/tests/strings/htmlentities24.phpt +++ b/ext/standard/tests/strings/htmlentities24.phpt @@ -310,7 +310,7 @@ string(198) "‚†™Ÿ€‚†„€&perm string(42) "<html> This is a test! </html>" *** Testing htmlentites() on a quote *** -string(36) "A 'quote' is <b>bold</b>" +string(46) "A 'quote' is <b>bold</b>" string(46) "A 'quote' is <b>bold</b>" string(36) "A 'quote' is <b>bold</b>" string(36) "A 'quote' is <b>bold</b>" diff --git a/ext/standard/tests/strings/htmlspecialchars.phpt b/ext/standard/tests/strings/htmlspecialchars.phpt index 095225f534..5844fdadb0 100644 --- a/ext/standard/tests/strings/htmlspecialchars.phpt +++ b/ext/standard/tests/strings/htmlspecialchars.phpt @@ -306,7 +306,7 @@ string(187) "<br>Testing<p>New file.</p><p><br>Fil string(46) "<br>Testing<p>New file.</p> " *** Testing htmlspecialchars() on a quote... -string(36) "A 'quote' is <b>bold</b>" +string(46) "A 'quote' is <b>bold</b>" string(46) "A 'quote' is <b>bold</b>" string(36) "A 'quote' is <b>bold</b>" string(36) "A 'quote' is <b>bold</b>" diff --git a/ext/standard/tests/strings/htmlspecialchars_basic.phpt b/ext/standard/tests/strings/htmlspecialchars_basic.phpt index 578814beff..c86692aa3b 100644 --- a/ext/standard/tests/strings/htmlspecialchars_basic.phpt +++ b/ext/standard/tests/strings/htmlspecialchars_basic.phpt @@ -56,7 +56,7 @@ Basic tests Test 1: abc<>"& Test 2: &&abc<>"& Test 3: a>,\<bc<>"& -Test 4: a\'\'&bc<>"& +Test 4: a\'\'&bc<>"& Test 5: &amp;&lt; Test 6: abc<>"& Test 7: &&abc<>"& diff --git a/ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt b/ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt index d3e3166fb9..ad9df68f05 100644 --- a/ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt +++ b/ext/standard/tests/strings/htmlspecialchars_decode_basic.phpt @@ -26,8 +26,8 @@ echo "Done"; ?> --EXPECT-- *** Testing htmlspecialchars_decode() : basic functionality *** -string(92) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" -string(92) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" +string(82) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" +string(82) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" string(92) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" string(92) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" string(102) "Roy's height > Sam's height. 13 < 25. 1111 & 0000 = 0000. " double quoted string "" diff --git a/ext/standard/tests/strings/htmlspecialchars_decode_variation3.phpt b/ext/standard/tests/strings/htmlspecialchars_decode_variation3.phpt index 93058d1853..98d4684d63 100644 --- a/ext/standard/tests/strings/htmlspecialchars_decode_variation3.phpt +++ b/ext/standard/tests/strings/htmlspecialchars_decode_variation3.phpt @@ -73,12 +73,12 @@ string(0) "" -- Iteration 2 -- string(0) "" -- Iteration 3 -- -string(103) "Roy's height > Sam's height +string(93) "Roy's height > Sam's height 13 < 25 1111 & 0000 = 0000 "This is a double quoted string"" -- Iteration 4 -- -string(130) "Roy's height > Sam 's height +string(120) "Roy's height > Sam 's height 1111 & 0000 = 0000 " heredoc double quoted string. with different white spaces"" @@ -87,8 +87,8 @@ string(62) "11 < 12. 123 string 4567 "string" 1111 & 0000 = 0000 ;" -- Iteration 6 -- -string(153) "< This's a string with quotes: +string(143) "< This's a string with quotes: "strings in double quote" & 'strings in single quote' " -this\line is 'single quoted' /with\slashes " +this\line is 'single quoted' /with\slashes " Done diff --git a/ext/standard/tests/strings/htmlspecialchars_decode_variation4.phpt b/ext/standard/tests/strings/htmlspecialchars_decode_variation4.phpt index bdbddcf8a9..00d4e9a143 100644 --- a/ext/standard/tests/strings/htmlspecialchars_decode_variation4.phpt +++ b/ext/standard/tests/strings/htmlspecialchars_decode_variation4.phpt @@ -33,22 +33,22 @@ echo "Done"; --EXPECT-- *** Testing htmlspecialchars_decode() : usage variations *** -- Iteration 1 -- -string(90) "Roy's height > Sam's \$height... 1111 ≈ 0000 = 0000... " double quote string "" +string(85) "Roy's height > Sam's \$height... 1111 ≈ 0000 = 0000... " double quote string "" string(90) "Roy's height > Sam's \$height... 1111 ≈ 0000 = 0000... " double quote string "" string(100) "Roy's height > Sam's \$height... 1111 ≈ 0000 = 0000... " double quote string "" string(85) "Roy's height > Sam's \$height... 1111 ≈ 0000 = 0000... " double quote string "" -- Iteration 2 -- -string(88) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string "" +string(78) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string "" string(88) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string "" string(98) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string "" string(78) "Roy's height > Sam's height... \t\t 13 < 15...\n\r " double quote\f\v string "" -- Iteration 3 -- -string(48) "\nRoy's height >\t; Sam's\v height\f" +string(38) "\nRoy's height >\t; Sam's\v height\f" string(48) "\nRoy's height >\t; Sam's\v height\f" string(48) "\nRoy's height >\t; Sam's\v height\f" string(38) "\nRoy's height >\t; Sam's\v height\f" -- Iteration 4 -- -string(48) "\r\tRoy's height >\r; Sam\t's height" +string(38) "\r\tRoy's height >\r; Sam\t's height" string(48) "\r\tRoy's height >\r; Sam\t's height" string(48) "\r\tRoy's height >\r; Sam\t's height" string(38) "\r\tRoy's height >\r; Sam\t's height" diff --git a/ext/standard/tests/strings/htmlspecialchars_decode_variation5.phpt b/ext/standard/tests/strings/htmlspecialchars_decode_variation5.phpt index 9937ca9b54..3f235944eb 100644 --- a/ext/standard/tests/strings/htmlspecialchars_decode_variation5.phpt +++ b/ext/standard/tests/strings/htmlspecialchars_decode_variation5.phpt @@ -32,12 +32,12 @@ echo "Done"; --EXPECT-- *** Testing htmlspecialchars_decode() : usage variations *** -- Iteration 1 -- -string(89) "Roy's height > Sam's $height... 1111 ≈ 0000 = 0000... " double quote string "" +string(84) "Roy's height > Sam's $height... 1111 ≈ 0000 = 0000... " double quote string "" string(89) "Roy's height > Sam's $height... 1111 ≈ 0000 = 0000... " double quote string "" string(99) "Roy's height > Sam's $height... 1111 ≈ 0000 = 0000... " double quote string "" string(84) "Roy's height > Sam's $height... 1111 ≈ 0000 = 0000... " double quote string "" -- Iteration 2 -- -string(82) "Roy's height > Sam's height... 13 < 15... +string(72) "Roy's height > Sam's height... 13 < 15... " double quote string "" string(82) "Roy's height > Sam's height... 13 < 15... " double quote string "" @@ -46,8 +46,8 @@ string(92) "Roy's height > Sam's height... 13 < 15... string(72) "Roy's height > Sam's height... 13 < 15... " double quote string "" -- Iteration 3 -- -string(44) " -Roy's height > ; Sam's height " +string(34) " +Roy's height > ; Sam's height " string(44) " Roy's height > ; Sam's height " string(44) " @@ -55,7 +55,7 @@ Roy's height > ; Sam's height " string(34) " Roy's height > ; Sam's height " -- Iteration 4 -- -string(44) " Roy's height > ; Sam 's height" +string(34) " Roy's height > ; Sam 's height" string(44) " Roy's height > ; Sam 's height" string(44) " Roy's height > ; Sam 's height" string(34) " Roy's height > ; Sam 's height" -- cgit v1.2.1