From e69b1ff2c43fbfd672097e9f8b25dead81f34ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gustavo=20Andr=C3=A9=20dos=20Santos=20Lopes?= Date: Wed, 27 Oct 2010 18:13:25 +0000 Subject: - Fixed bug #49687 (utf8_decode vulnerabilities and deficiencies in the number of reported malformed sequences). (Gustavo) #Made a public interface for get_next_char/utf-8 in trunk to use in utf8_decode. #In PHP 5.3, trunk's get_next_char was copied to xml.c because 5.3's #get_next_char is different and is not prepared to recover appropriately from #errors. --- ext/standard/html.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'ext/standard/html.c') diff --git a/ext/standard/html.c b/ext/standard/html.c index 354e18bfec..de763cf72a 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -92,9 +92,9 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) /* {{{ get_next_char */ -static unsigned int get_next_char( +static inline unsigned int get_next_char( enum entity_charset charset, - unsigned char *str, + const unsigned char *str, size_t str_len, size_t *cursor, int *status) @@ -352,6 +352,18 @@ static unsigned int get_next_char( } /* }}} */ +/* {{{ php_next_utf8_char + * Public interface for get_next_char used with UTF-8 */ + PHPAPI unsigned int php_next_utf8_char( + const unsigned char *str, + size_t str_len, + size_t *cursor, + int *status) +{ + return get_next_char(cs_utf_8, str, str_len, cursor, status); +} +/* }}} */ + /* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. * defaults to UTF-8 */ -- cgit v1.2.1