summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristoph M. Becker <cmbecker69@gmx.de>2021-03-03 19:23:39 +0100
committerChristoph M. Becker <cmbecker69@gmx.de>2021-03-08 15:07:01 +0100
commitf901bec494ae921f36e1066e4380b92888757f0f (patch)
treea6c11bd896ae5572f72a85b259ad1d587c30a0eb
parent5787f91c55a7ebaeb34711d303cfc27f089f58b3 (diff)
downloadphp-git-f901bec494ae921f36e1066e4380b92888757f0f.tar.gz
Fix #51903: simplexml_load_file() doesn't use HTTP headers
The `encoding` attribute of the XML declaration is optional; it is good practice to use external encoding information where available if it is missing. Thus, we check for `charset` info of `Content-Type` headers, and see whether the encoding is supported. We cater to trailing parameters and quoted-strings, but not to escaped backslashes and quotes in quoted-strings, since no known character encoding contains these anyway. Co-authored-by: Michael Wallner <mike@php.net> Closes GH-6747.
-rw-r--r--NEWS3
-rw-r--r--ext/libxml/libxml.c48
-rw-r--r--ext/libxml/tests/bug51903.phpt38
3 files changed, 89 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 751b79be69..4ccac5378f 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,9 @@ PHP NEWS
. Fixed bug #80763 (msgfmt_format() does not accept DateTime references).
(cmb)
+- Libxml:
+ . Fixed bug #51903 (simplexml_load_file() doesn't use HTTP headers). (cmb)
+
- MySQLnd:
. Fixed bug #80713 (SegFault when disabling ATTR_EMULATE_PREPARES and
MySQL 8.0). (Nikita)
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index c024e16670..e21d6fdbbe 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -409,6 +409,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
return(NULL);
}
+ /* Check if there's been an external transport protocol with an encoding information */
+ if (enc == XML_CHAR_ENCODING_NONE) {
+ php_stream *s = (php_stream *) context;
+
+ if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
+ zval *header;
+
+ ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
+ const char buf[] = "Content-Type:";
+ if (Z_TYPE_P(header) == IS_STRING &&
+ !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
+ char *needle = estrdup("charset=");
+ char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
+ char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
+
+ if (encoding) {
+ char *end;
+
+ encoding += sizeof("charset=")-1;
+ if (*encoding == '"') {
+ encoding++;
+ }
+ end = strchr(encoding, ';');
+ if (end == NULL) {
+ end = encoding + strlen(encoding);
+ }
+ end--; /* end == encoding-1 isn't a buffer underrun */
+ while (*end == ' ' || *end == '\t') {
+ end--;
+ }
+ if (*end == '"') {
+ end--;
+ }
+ if (encoding >= end) continue;
+ *(end+1) = '\0';
+ enc = xmlParseCharEncoding(encoding);
+ if (enc <= XML_CHAR_ENCODING_NONE) {
+ enc = XML_CHAR_ENCODING_NONE;
+ }
+ }
+ efree(haystack);
+ efree(needle);
+ break; /* found content-type */
+ }
+ } ZEND_HASH_FOREACH_END();
+ }
+ }
+
/* Allocate the Input buffer front-end. */
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
diff --git a/ext/libxml/tests/bug51903.phpt b/ext/libxml/tests/bug51903.phpt
new file mode 100644
index 0000000000..36a4b55704
--- /dev/null
+++ b/ext/libxml/tests/bug51903.phpt
@@ -0,0 +1,38 @@
+--TEST--
+Bug #51903 (simplexml_load_file() doesn't use HTTP headers)
+--SKIPIF--
+<?php
+if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
+if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
+http_server_skipif('tcp://127.0.0.1:12342');
+?>
+--FILE--
+<?php
+require "./ext/standard/tests/http/server.inc";
+$responses = [
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=\"ISO-8859-1\" ; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+];
+$pid = http_server('tcp://127.0.0.1:12342', $responses);
+
+for ($i = 0; $i < count($responses); $i++) {
+ $sxe = simplexml_load_file('http://127.0.0.1:12342/');
+ echo "$sxe\n";
+}
+
+http_server_kill($pid);
+?>
+--EXPECT--
+äöü
+äöü
+äöü