summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2009-12-07 15:41:43 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2009-12-07 15:41:43 +0000
commit428088c848e75f38f3d03cecf9aca04d643c29dc (patch)
treeac7fbc4dc76315ddf9325fc63f9b90d3eaf9e8d2
parent9fca678c4737f3ca850008d65b4f79e7be7c7c77 (diff)
downloadphp-git-428088c848e75f38f3d03cecf9aca04d643c29dc.tar.gz
- Take account of surrogate pairs.
-rw-r--r--ext/standard/html.c2
-rw-r--r--ext/standard/tests/strings/bug49785.phpt10
2 files changed, 11 insertions, 1 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 14b4f2caba..2cc1cc287b 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -566,6 +566,8 @@ inline static unsigned int get_next_char(enum entity_charset charset,
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
if (this_char < 0x800) {
MB_FAILURE(pos);
+ } else if (this_char >= 0xd800 && this_char <= 0xdfff) {
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
diff --git a/ext/standard/tests/strings/bug49785.phpt b/ext/standard/tests/strings/bug49785.phpt
index f344855931..3f60e9fd37 100644
--- a/ext/standard/tests/strings/bug49785.phpt
+++ b/ext/standard/tests/strings/bug49785.phpt
@@ -36,10 +36,14 @@ var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
echo "--\n";
-// UTF-8: alternative (invalid) UTF-8 sequence
+// UTF-8: alternative (invalid) UTF-8 sequence / surrogate pairs
var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8')));
var_dump(_bin2hex(htmlspecialchars("\xf0\x80\x80\xa6", ENT_QUOTES, 'UTF-8')));
+var_dump(_bin2hex(htmlspecialchars("\xec\xbf\xbf", ENT_QUOTES, 'UTF-8')));
+var_dump(_bin2hex(htmlspecialchars("\xed\xa0\x80", ENT_QUOTES, 'UTF-8')));
+var_dump(_bin2hex(htmlspecialchars("\xed\xbf\xbf", ENT_QUOTES, 'UTF-8')));
+var_dump(_bin2hex(htmlspecialchars("\xee\x80\x80", ENT_QUOTES, 'UTF-8')));
// Shift_JIS: non-lead byte >= 0x80
var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'Shift_JIS')));
@@ -158,6 +162,10 @@ string(0) ""
string(0) ""
string(0) ""
string(0) ""
+string(6) "ecbfbf"
+string(0) ""
+string(0) ""
+string(6) "ee8080"
string(2) "80"
string(2) "a0"
string(2) "a1"