summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2009-10-11 23:52:33 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2009-10-11 23:52:33 +0000
commit1835a63dfddcf1a82d677ee2cf1db2d8a34d1a7f (patch)
treef6514322d8a2c45f8ab823f632d32add639adb49
parent112379ba243e788aaccc030406455c7ce3cbd9d7 (diff)
downloadphp-git-1835a63dfddcf1a82d677ee2cf1db2d8a34d1a7f.tar.gz
- A couple more fix for my previous fix.
(one of the fix by Arnaud Le Blanc. Thanks!)
-rw-r--r--ext/standard/html.c68
-rw-r--r--ext/standard/tests/strings/bug49785.phpt54
-rwxr-xr-xext/standard/tests/strings/htmlentities-utf-2.phpt12
-rwxr-xr-xext/standard/tests/strings/htmlentities-utf.phpt12
4 files changed, 99 insertions, 47 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c
index 8845a8aa8c..7932194cb8 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -483,11 +483,26 @@ struct basic_entities_dec {
} \
mbseq[mbpos++] = (mbchar); }
+/* skip one byte and return */
+#define MB_FAILURE(pos) do { \
+ *newpos = pos + 1; \
+ *status = FAILURE; \
+ return 0; \
+} while (0)
+
#define CHECK_LEN(pos, chars_need) \
- if((str_len - (pos)) < chars_need) { \
- *newpos = pos; \
- *status = FAILURE; \
- return 0; \
+ if (chars_need < 1) { \
+ if((str_len - (pos)) < chars_need) { \
+ *newpos = pos; \
+ *status = FAILURE; \
+ return 0; \
+ } \
+ } else { \
+ if((str_len - (pos)) < chars_need) { \
+ *newpos = pos + 1; \
+ *status = FAILURE; \
+ return 0; \
+ } \
}
/* {{{ get_next_char
@@ -526,10 +541,12 @@ inline static unsigned int get_next_char(enum entity_charset charset,
this_char = c;
pos++;
} else if (c < 0xc0) {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
} else if (c < 0xe0) {
CHECK_LEN(pos, 2);
+ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+ MB_FAILURE(pos);
+ }
this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
if (this_char < 0x80) {
*status = FAILURE;
@@ -540,10 +557,15 @@ inline static unsigned int get_next_char(enum entity_charset charset,
pos += 2;
} else if (c < 0xf0) {
CHECK_LEN(pos, 3);
+ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+ MB_FAILURE(pos);
+ }
+ if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+ MB_FAILURE(pos);
+ }
this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
if (this_char < 0x800) {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
@@ -551,10 +573,18 @@ inline static unsigned int get_next_char(enum entity_charset charset,
pos += 3;
} else if (c < 0xf8) {
CHECK_LEN(pos, 4);
+ if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+ MB_FAILURE(pos);
+ }
+ if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+ MB_FAILURE(pos);
+ }
+ if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
+ MB_FAILURE(pos);
+ }
this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
if (this_char < 0x10000) {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
MB_WRITE((unsigned char)c);
MB_WRITE((unsigned char)str[pos + 1]);
@@ -562,8 +592,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE((unsigned char)str[pos + 3]);
pos += 4;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
}
break;
@@ -585,8 +614,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE(next_char);
this_char = (this_char << 8) | next_char;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
} else {
MB_WRITE(this_char);
@@ -611,8 +639,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE(next_char);
this_char = (this_char << 8) | next_char;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
} else {
MB_WRITE(this_char);
@@ -634,8 +661,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE(next_char);
this_char = (this_char << 8) | next_char;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
} else if (this_char == 0x8e) {
/* peek at the next char */
@@ -647,8 +673,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE(next_char);
this_char = (this_char << 8) | next_char;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
} else if (this_char == 0x8f) {
/* peek at the next two char */
@@ -665,8 +690,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
MB_WRITE(next2_char);
this_char = (this_char << 16) | (next_char << 8) | next_char;
} else {
- *status = FAILURE;
- return 0;
+ MB_FAILURE(pos);
}
} else {
MB_WRITE(this_char);
diff --git a/ext/standard/tests/strings/bug49785.phpt b/ext/standard/tests/strings/bug49785.phpt
index eb4ad8b743..f344855931 100644
--- a/ext/standard/tests/strings/bug49785.phpt
+++ b/ext/standard/tests/strings/bug49785.phpt
@@ -7,19 +7,33 @@ function _bin2hex($val) {
}
// UTF-8: basic tests
-var_dump(bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
echo "--\n";
// UTF-8: alternative (invalid) UTF-8 sequence
@@ -115,17 +129,31 @@ foreach (array_map('chr', range(0x81, 0xfe)) as $c) {
--EXPECT--
string(0) ""
string(4) "c280"
+string(0) ""
+string(0) ""
string(14) "26416c7068613b"
string(14) "26616c7068613b"
string(4) "dfbf"
string(6) "e0a080"
string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
string(16) "266865617274733b"
string(6) "efbfbf"
string(0) ""
+string(0) ""
+string(0) ""
string(8) "f0908080"
string(8) "f7bfbfbf"
string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
--
string(0) ""
string(0) ""
diff --git a/ext/standard/tests/strings/htmlentities-utf-2.phpt b/ext/standard/tests/strings/htmlentities-utf-2.phpt
index a80100cb10..c5f4ac4ea6 100755
--- a/ext/standard/tests/strings/htmlentities-utf-2.phpt
+++ b/ext/standard/tests/strings/htmlentities-utf-2.phpt
@@ -36,8 +36,8 @@ foreach($strings as $string) {
%unicode|string%(0) ""
%unicode|string%(2) "79"
%unicode|string%(2) "79"
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
%unicode|string%(8) "566f696c"
%unicode|string%(8) "566f696c"
%unicode|string%(12) "436c69636873"
@@ -52,10 +52,10 @@ foreach($strings as $string) {
%unicode|string%(2) "79"
%unicode|string%(8) "f7bfbfbf"
%unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
%unicode|string%(4) "4142"
%unicode|string%(4) "4142"
%unicode|string%(4) "4242"
diff --git a/ext/standard/tests/strings/htmlentities-utf.phpt b/ext/standard/tests/strings/htmlentities-utf.phpt
index b85803a163..1daafc61d8 100755
--- a/ext/standard/tests/strings/htmlentities-utf.phpt
+++ b/ext/standard/tests/strings/htmlentities-utf.phpt
@@ -36,8 +36,8 @@ foreach($strings as $string) {
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(0) ""
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(0) ""
@@ -52,10 +52,10 @@ foreach($strings as $string) {
%unicode|string%(0) ""
%unicode|string%(8) "f7bfbfbf"
%unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(0) ""