summaryrefslogtreecommitdiff
path: root/ext/mbstring
diff options
context:
space:
mode:
authorMoriyoshi Koizumi <moriyoshi@php.net>2009-07-29 04:44:08 +0000
committerMoriyoshi Koizumi <moriyoshi@php.net>2009-07-29 04:44:08 +0000
commit7ce2bff51167c42b084f2ce5d2c00c0fd243dd68 (patch)
tree5b217f240b261c35b242d7b45d005b205a0f336d /ext/mbstring
parent9e445b043b58c646e046bf997f3ed0d8a71360de (diff)
downloadphp-git-7ce2bff51167c42b084f2ce5d2c00c0fd243dd68.tar.gz
* Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
Diffstat (limited to 'ext/mbstring')
-rw-r--r--ext/mbstring/libmbfl/filters/mbfilter_htmlent.c52
-rw-r--r--ext/mbstring/tests/bug48645.phpt162
2 files changed, 208 insertions, 6 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
index 6c6654a1b9..0163520f5b 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
@@ -186,18 +186,58 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter)
}
} else {
if (c == ';') {
- buffer[filter->status] = 0;
if (buffer[1]=='#') {
- /* numeric entity */
- for (pos=2; pos<filter->status; pos++) {
- ent = ent*10 + (buffer[pos] - '0');
+ if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+ if (filter->status > 3) {
+ /* numeric entity */
+ for (pos=3; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else if (v >= 'A' && v <= 'F') {
+ v = v - 'A' + 10;
+ } else if (v >= 'a' && v <= 'f') {
+ v = v - 'a' + 10;
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent * 16 + v;
+ }
+ } else {
+ ent = -1;
+ }
+ } else {
+ /* numeric entity */
+ if (filter->status > 2) {
+ for (pos=2; pos<filter->status; pos++) {
+ int v = buffer[pos];
+ if (v >= '0' && v <= '9') {
+ v = v - '0';
+ } else {
+ ent = -1;
+ break;
+ }
+ ent = ent*10 + v;
+ }
+ } else {
+ ent = -1;
+ }
+ }
+ if (ent >= 0 && ent < 0x110000) {
+ CK((*filter->output_function)(ent, filter->data));
+ } else {
+ for (pos = 0; pos < filter->status; pos++) {
+ CK((*filter->output_function)(buffer[pos], filter->data));
+ }
+ CK((*filter->output_function)(c, filter->data));
}
- CK((*filter->output_function)(ent, filter->data));
filter->status = 0;
/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
} else {
/* named entity */
- entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+ buffer[filter->status] = 0;
+ entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
while (entity->name) {
if (!strcmp(buffer+1, entity->name)) {
ent = entity->code;
diff --git a/ext/mbstring/tests/bug48645.phpt b/ext/mbstring/tests/bug48645.phpt
new file mode 100644
index 0000000000..6185442cc1
--- /dev/null
+++ b/ext/mbstring/tests/bug48645.phpt
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("&#x0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114111;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114112;", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"