diff options
-rw-r--r-- | ext/standard/html.c | 6 | ||||
-rw-r--r-- | ext/standard/html.h | 2 | ||||
-rwxr-xr-x | ext/standard/tests/strings/htmlentities-utf-2.phpt | 70 | ||||
-rwxr-xr-x | ext/standard/tests/strings/htmlentities-utf.phpt | 78 |
4 files changed, 135 insertions, 21 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 7a2d9cd8c7..14b4fab941 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -485,6 +485,7 @@ struct basic_entities_dec { #define CHECK_LEN(pos, chars_need) \ if((str_len - (pos)) < chars_need) { \ + *newpos = pos; \ *status = FAILURE; \ return 0; \ } @@ -529,6 +530,7 @@ inline static unsigned short get_next_char(enum entity_charset charset, more = 0; if(stat) { /* we didn't finish the UTF sequence correctly */ + --pos; *status = FAILURE; } break; @@ -1135,6 +1137,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne if(status == FAILURE) { /* invalid MB sequence */ + if (quote_style & ENT_HTML_IGNORE_ERRORS) { + continue; + } efree(replaced); if(!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument"); @@ -1293,6 +1298,7 @@ void register_html_constants(INIT_FUNC_ARGS) REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS); + REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS); } /* }}} */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 76a27418aa..003496ca2e 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -24,10 +24,12 @@ #define ENT_HTML_QUOTE_NONE 0 #define ENT_HTML_QUOTE_SINGLE 1 #define ENT_HTML_QUOTE_DOUBLE 2 +#define ENT_HTML_IGNORE_ERRORS 4 #define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE #define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE) #define ENT_NOQUOTES ENT_HTML_QUOTE_NONE +#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS void register_html_constants(INIT_FUNC_ARGS); diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt new file mode 100755 index 0000000000..a80100cb10 --- /dev/null +++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt @@ -0,0 +1,70 @@ +--TEST-- +HTML entities with invalid chars and ENT_IGNORE +--INI-- +output_handler= +--FILE-- +<?php +@setlocale (LC_CTYPE, "C"); +$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE", + b"Voil\xE0", b"Clich\xE9s", + b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF", + b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD" + ); +foreach($strings as $string) { + $sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE, "utf-8"); + var_dump(bin2hex($sc_encoded)); + $ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8"); + var_dump(bin2hex($ent_encoded)); +} +?> +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "566f696c" +%unicode|string%(8) "566f696c" +%unicode|string%(12) "436c69636873" +%unicode|string%(12) "436c69636873" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(2) "41" +%unicode|string%(2) "41" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(2) "79" +%unicode|string%(2) "79" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(4) "4142" +%unicode|string%(4) "4142" +%unicode|string%(4) "4242" +%unicode|string%(4) "4242" +%unicode|string%(4) "4342" +%unicode|string%(4) "4342" +%unicode|string%(2) "44" +%unicode|string%(2) "44" +%unicode|string%(2) "45" +%unicode|string%(2) "45" +%unicode|string%(2) "46" +%unicode|string%(2) "46" diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt index 6b83afc778..b85803a163 100755 --- a/ext/standard/tests/strings/htmlentities-utf.phpt +++ b/ext/standard/tests/strings/htmlentities-utf.phpt @@ -4,8 +4,12 @@ HTML entities with invalid chars output_handler= --FILE-- <?php -setlocale (LC_CTYPE, "C"); -$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0", "\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE"); +@setlocale (LC_CTYPE, "C"); +$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE", + b"Voil\xE0", b"Clich\xE9s", + b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF", + b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD" + ); foreach($strings as $string) { $sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8"); var_dump(bin2hex($sc_encoded)); @@ -13,22 +17,54 @@ foreach($strings as $string) { var_dump(bin2hex($ent_encoded)); } ?> ---EXPECT-- -string(8) "266c743b" -string(8) "266c743b" -string(0) "" -string(0) "" -string(4) "d090" -string(4) "d090" -string(0) "" -string(0) "" -string(8) "d090d0b0" -string(8) "d090d0b0" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(8) "2667743b" -string(8) "2667743b"
\ No newline at end of file +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" |