summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnaud Le Blanc <lbarnaud@php.net>2008-11-26 03:00:06 +0000
committerArnaud Le Blanc <lbarnaud@php.net>2008-11-26 03:00:06 +0000
commit18794addbdd5f0463e9ac27396115716243ef930 (patch)
tree25c22db1ee08a2dfabdb34e4fa46626ef4b5c791
parenta05edaf2bd92d9b5edeeb8ed737d9b371ab3e7d6 (diff)
downloadphp-git-18794addbdd5f0463e9ac27396115716243ef930.tar.gz
MFH: Added ENT_IGNORE as a compatibility flag for htmlentities() and
htmlspecialchars() to skip multibyte sequences intead of returning an empty string (as iconv's //IGNORE). These functions will still never return an invalid or incomplete multibyte sequence. Fixes #43896
-rw-r--r--ext/standard/html.c6
-rw-r--r--ext/standard/html.h2
-rwxr-xr-xext/standard/tests/strings/htmlentities-utf-2.phpt70
-rwxr-xr-xext/standard/tests/strings/htmlentities-utf.phpt78
4 files changed, 135 insertions, 21 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 7a2d9cd8c7..14b4fab941 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -485,6 +485,7 @@ struct basic_entities_dec {
#define CHECK_LEN(pos, chars_need) \
if((str_len - (pos)) < chars_need) { \
+ *newpos = pos; \
*status = FAILURE; \
return 0; \
}
@@ -529,6 +530,7 @@ inline static unsigned short get_next_char(enum entity_charset charset,
more = 0;
if(stat) {
/* we didn't finish the UTF sequence correctly */
+ --pos;
*status = FAILURE;
}
break;
@@ -1135,6 +1137,9 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, int oldlen, int *ne
if(status == FAILURE) {
/* invalid MB sequence */
+ if (quote_style & ENT_HTML_IGNORE_ERRORS) {
+ continue;
+ }
efree(replaced);
if(!PG(display_errors)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument");
@@ -1293,6 +1298,7 @@ void register_html_constants(INIT_FUNC_ARGS)
REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
+ REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS);
}
/* }}} */
diff --git a/ext/standard/html.h b/ext/standard/html.h
index 76a27418aa..003496ca2e 100644
--- a/ext/standard/html.h
+++ b/ext/standard/html.h
@@ -24,10 +24,12 @@
#define ENT_HTML_QUOTE_NONE 0
#define ENT_HTML_QUOTE_SINGLE 1
#define ENT_HTML_QUOTE_DOUBLE 2
+#define ENT_HTML_IGNORE_ERRORS 4
#define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE
#define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE)
#define ENT_NOQUOTES ENT_HTML_QUOTE_NONE
+#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS
void register_html_constants(INIT_FUNC_ARGS);
diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt
new file mode 100755
index 0000000000..a80100cb10
--- /dev/null
+++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt
@@ -0,0 +1,70 @@
+--TEST--
+HTML entities with invalid chars and ENT_IGNORE
+--INI--
+output_handler=
+--FILE--
+<?php
+@setlocale (LC_CTYPE, "C");
+$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE",
+ b"Voil\xE0", b"Clich\xE9s",
+ b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
+ b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
+ );
+foreach($strings as $string) {
+ $sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
+ var_dump(bin2hex($sc_encoded));
+ $ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
+ var_dump(bin2hex($ent_encoded));
+}
+?>
+--EXPECTF--
+%unicode|string%(8) "266c743b"
+%unicode|string%(8) "266c743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "41"
+%unicode|string%(2) "41"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "79"
+%unicode|string%(2) "79"
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "566f696c"
+%unicode|string%(8) "566f696c"
+%unicode|string%(12) "436c69636873"
+%unicode|string%(12) "436c69636873"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(2) "41"
+%unicode|string%(2) "41"
+%unicode|string%(4) "c3a9"
+%unicode|string%(16) "266561637574653b"
+%unicode|string%(2) "79"
+%unicode|string%(2) "79"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(4) "4142"
+%unicode|string%(4) "4142"
+%unicode|string%(4) "4242"
+%unicode|string%(4) "4242"
+%unicode|string%(4) "4342"
+%unicode|string%(4) "4342"
+%unicode|string%(2) "44"
+%unicode|string%(2) "44"
+%unicode|string%(2) "45"
+%unicode|string%(2) "45"
+%unicode|string%(2) "46"
+%unicode|string%(2) "46"
diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt
index 6b83afc778..b85803a163 100755
--- a/ext/standard/tests/strings/htmlentities-utf.phpt
+++ b/ext/standard/tests/strings/htmlentities-utf.phpt
@@ -4,8 +4,12 @@ HTML entities with invalid chars
output_handler=
--FILE--
<?php
-setlocale (LC_CTYPE, "C");
-$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0", "\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE");
+@setlocale (LC_CTYPE, "C");
+$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE",
+ b"Voil\xE0", b"Clich\xE9s",
+ b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
+ b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
+ );
foreach($strings as $string) {
$sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8");
var_dump(bin2hex($sc_encoded));
@@ -13,22 +17,54 @@ foreach($strings as $string) {
var_dump(bin2hex($ent_encoded));
}
?>
---EXPECT--
-string(8) "266c743b"
-string(8) "266c743b"
-string(0) ""
-string(0) ""
-string(4) "d090"
-string(4) "d090"
-string(0) ""
-string(0) ""
-string(8) "d090d0b0"
-string(8) "d090d0b0"
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(0) ""
-string(8) "2667743b"
-string(8) "2667743b" \ No newline at end of file
+--EXPECTF--
+%unicode|string%(8) "266c743b"
+%unicode|string%(8) "266c743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "c3a9"
+%unicode|string%(16) "266561637574653b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""