diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-27 18:13:25 +0000 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-27 18:13:25 +0000 |
commit | e69b1ff2c43fbfd672097e9f8b25dead81f34ccf (patch) | |
tree | 50dd9955cf078bbeefd120d86d09aec99c9db7cd | |
parent | da400e7500e236a332b8104b373e60e842bbd63e (diff) | |
download | php-git-e69b1ff2c43fbfd672097e9f8b25dead81f34ccf.tar.gz |
- Fixed bug #49687 (utf8_decode vulnerabilities and deficiencies in the number
of reported malformed sequences). (Gustavo)
#Made a public interface for get_next_char/utf-8 in trunk to use in utf8_decode.
#In PHP 5.3, trunk's get_next_char was copied to xml.c because 5.3's
#get_next_char is different and is not prepared to recover appropriately from
#errors.
-rw-r--r-- | ext/standard/html.c | 16 | ||||
-rw-r--r-- | ext/standard/html.h | 1 | ||||
-rw-r--r-- | ext/xml/tests/bug49687.phpt | 24 | ||||
-rw-r--r-- | ext/xml/xml.c | 40 |
4 files changed, 49 insertions, 32 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index 354e18bfec..de763cf72a 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -92,9 +92,9 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) /* {{{ get_next_char */ -static unsigned int get_next_char( +static inline unsigned int get_next_char( enum entity_charset charset, - unsigned char *str, + const unsigned char *str, size_t str_len, size_t *cursor, int *status) @@ -352,6 +352,18 @@ static unsigned int get_next_char( } /* }}} */ +/* {{{ php_next_utf8_char + * Public interface for get_next_char used with UTF-8 */ + PHPAPI unsigned int php_next_utf8_char( + const unsigned char *str, + size_t str_len, + size_t *cursor, + int *status) +{ + return get_next_char(cs_utf_8, str, str_len, cursor, status); +} +/* }}} */ + /* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. * defaults to UTF-8 */ diff --git a/ext/standard/html.h b/ext/standard/html.h index 4915e171cb..8d9efc406e 100644 --- a/ext/standard/html.h +++ b/ext/standard/html.h @@ -57,5 +57,6 @@ PHP_FUNCTION(get_html_translation_table); PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC); PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC); PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC); +PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status); #endif /* HTML_H */ diff --git a/ext/xml/tests/bug49687.phpt b/ext/xml/tests/bug49687.phpt new file mode 100644 index 0000000000..3ff19cee7e --- /dev/null +++ b/ext/xml/tests/bug49687.phpt @@ -0,0 +1,24 @@ +--TEST--
+Bug #49687 Several utf8_decode deficiencies and vulnerabilities
+--SKIPIF--
+<?php
+require_once("skipif.inc");
+if (!extension_loaded('xml')) die ("skip xml extension not available");
+?>
+--FILE--
+<?php
+
+$tests = array(
+ "\x41\xC2\x3E\x42",
+ "\xE3\x80\x22",
+ "\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98",
+);
+foreach ($tests as $t) {
+ echo bin2hex(utf8_decode($t)), "\n";
+}
+echo "Done.\n";
+--EXPECT--
+413f3e42
+3f22
+413f3f423f433f3f
+Done.
diff --git a/ext/xml/xml.c b/ext/xml/xml.c index 72729d6de5..54e03d5e63 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -32,6 +32,7 @@ #include "zend_variables.h" #include "ext/standard/php_string.h" #include "ext/standard/info.h" +#include "ext/standard/html.h" #if HAVE_XML @@ -662,7 +663,7 @@ PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char /* {{{ xml_utf8_decode */ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) { - int pos = len; + size_t pos = 0; char *newbuf = emalloc(len + 1); unsigned int c; char (*decoder)(unsigned short) = NULL; @@ -681,36 +682,15 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_ newbuf[*newlen] = '\0'; return newbuf; } - while (pos > 0) { - c = (unsigned char)(*s); - if (c >= 0xf0) { /* four bytes encoded, 21 bits */ - if(pos-4 >= 0) { - c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); - } else { - c = '?'; - } - s += 4; - pos -= 4; - } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ - if(pos-3 >= 0) { - c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); - } else { - c = '?'; - } - s += 3; - pos -= 3; - } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ - if(pos-2 >= 0) { - c = ((s[0]&63)<<6) | (s[1]&63); - } else { - c = '?'; - } - s += 2; - pos -= 2; - } else { - s++; - pos--; + + while (pos < (size_t)len) { + int status = FAILURE; + c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status); + + if (status == FAILURE || c > 0xFFU) { + c = '?'; } + newbuf[*newlen] = decoder ? decoder(c) : c; ++*newlen; } |