diff options
author | Moriyoshi Koizumi <moriyoshi@php.net> | 2009-07-29 04:44:08 +0000 |
---|---|---|
committer | Moriyoshi Koizumi <moriyoshi@php.net> | 2009-07-29 04:44:08 +0000 |
commit | 7ce2bff51167c42b084f2ce5d2c00c0fd243dd68 (patch) | |
tree | 5b217f240b261c35b242d7b45d005b205a0f336d /ext/mbstring | |
parent | 9e445b043b58c646e046bf997f3ed0d8a71360de (diff) | |
download | php-git-7ce2bff51167c42b084f2ce5d2c00c0fd243dd68.tar.gz |
* Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
Diffstat (limited to 'ext/mbstring')
-rw-r--r-- | ext/mbstring/libmbfl/filters/mbfilter_htmlent.c | 52 | ||||
-rw-r--r-- | ext/mbstring/tests/bug48645.phpt | 162 |
2 files changed, 208 insertions, 6 deletions
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c index 6c6654a1b9..0163520f5b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c @@ -186,18 +186,58 @@ int mbfl_filt_conv_html_dec(int c, mbfl_convert_filter *filter) } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; pos<filter->status; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } + } + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); } - CK((*filter->output_function)(ent, filter->data)); filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; diff --git a/ext/mbstring/tests/bug48645.phpt b/ext/mbstring/tests/bug48645.phpt new file mode 100644 index 0000000000..6185442cc1 --- /dev/null +++ b/ext/mbstring/tests/bug48645.phpt @@ -0,0 +1,162 @@ +--TEST-- +Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +?> +--EXPECT-- +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623782f3b" +string(10) "2623783a3b" +string(10) "262378403b" +string(10) "262378603b" +string(10) "262378473b" +string(10) "262378673b" +string(8) "2623783b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623582f3b" +string(10) "2623583a3b" +string(10) "262358403b" +string(10) "262358603b" +string(10) "262358473b" +string(10) "262358673b" +string(8) "2623583b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(8) "26232f3b" +string(8) "26233a3b" +string(6) "26233b" +string(8) "f48fbfbf" +string(20) "2623783131303030303b" +string(8) "f48fbfbf" +string(20) "2623583131303030303b" +string(8) "f48fbfbf" +string(20) "2623313131343131323b" |