summaryrefslogtreecommitdiff
path: root/ext/xml
diff options
context:
space:
mode:
authorGustavo André dos Santos Lopes <cataphract@php.net>2010-10-27 18:13:25 +0000
committerGustavo André dos Santos Lopes <cataphract@php.net>2010-10-27 18:13:25 +0000
commite69b1ff2c43fbfd672097e9f8b25dead81f34ccf (patch)
tree50dd9955cf078bbeefd120d86d09aec99c9db7cd /ext/xml
parentda400e7500e236a332b8104b373e60e842bbd63e (diff)
downloadphp-git-e69b1ff2c43fbfd672097e9f8b25dead81f34ccf.tar.gz
- Fixed bug #49687 (utf8_decode vulnerabilities and deficiencies in the number
of reported malformed sequences). (Gustavo) #Made a public interface for get_next_char/utf-8 in trunk to use in utf8_decode. #In PHP 5.3, trunk's get_next_char was copied to xml.c because 5.3's #get_next_char is different and is not prepared to recover appropriately from #errors.
Diffstat (limited to 'ext/xml')
-rw-r--r--ext/xml/tests/bug49687.phpt24
-rw-r--r--ext/xml/xml.c40
2 files changed, 34 insertions, 30 deletions
diff --git a/ext/xml/tests/bug49687.phpt b/ext/xml/tests/bug49687.phpt
new file mode 100644
index 0000000000..3ff19cee7e
--- /dev/null
+++ b/ext/xml/tests/bug49687.phpt
@@ -0,0 +1,24 @@
+--TEST--
+Bug #49687 Several utf8_decode deficiencies and vulnerabilities
+--SKIPIF--
+<?php
+require_once("skipif.inc");
+if (!extension_loaded('xml')) die ("skip xml extension not available");
+?>
+--FILE--
+<?php
+
+$tests = array(
+ "\x41\xC2\x3E\x42",
+ "\xE3\x80\x22",
+ "\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98",
+);
+foreach ($tests as $t) {
+ echo bin2hex(utf8_decode($t)), "\n";
+}
+echo "Done.\n";
+--EXPECT--
+413f3e42
+3f22
+413f3f423f433f3f
+Done.
diff --git a/ext/xml/xml.c b/ext/xml/xml.c
index 72729d6de5..54e03d5e63 100644
--- a/ext/xml/xml.c
+++ b/ext/xml/xml.c
@@ -32,6 +32,7 @@
#include "zend_variables.h"
#include "ext/standard/php_string.h"
#include "ext/standard/info.h"
+#include "ext/standard/html.h"
#if HAVE_XML
@@ -662,7 +663,7 @@ PHPAPI char *xml_utf8_encode(const char *s, int len, int *newlen, const XML_Char
/* {{{ xml_utf8_decode */
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding)
{
- int pos = len;
+ size_t pos = 0;
char *newbuf = emalloc(len + 1);
unsigned int c;
char (*decoder)(unsigned short) = NULL;
@@ -681,36 +682,15 @@ PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_
newbuf[*newlen] = '\0';
return newbuf;
}
- while (pos > 0) {
- c = (unsigned char)(*s);
- if (c >= 0xf0) { /* four bytes encoded, 21 bits */
- if(pos-4 >= 0) {
- c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63);
- } else {
- c = '?';
- }
- s += 4;
- pos -= 4;
- } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */
- if(pos-3 >= 0) {
- c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63);
- } else {
- c = '?';
- }
- s += 3;
- pos -= 3;
- } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */
- if(pos-2 >= 0) {
- c = ((s[0]&63)<<6) | (s[1]&63);
- } else {
- c = '?';
- }
- s += 2;
- pos -= 2;
- } else {
- s++;
- pos--;
+
+ while (pos < (size_t)len) {
+ int status = FAILURE;
+ c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
+
+ if (status == FAILURE || c > 0xFFU) {
+ c = '?';
}
+
newbuf[*newlen] = decoder ? decoder(c) : c;
++*newlen;
}