summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorChristoph M. Becker <cmbecker69@gmx.de>2021-03-08 15:08:11 +0100
committerChristoph M. Becker <cmbecker69@gmx.de>2021-03-08 15:15:59 +0100
commit7931956805beba80188f3c0638c285f8fb75dfe1 (patch)
treeaf72055b276b99f17d13d78a2c47d080e168596e /ext
parent3880b8785be8cec394e7290e0ef4307be0ae596e (diff)
parentf901bec494ae921f36e1066e4380b92888757f0f (diff)
downloadphp-git-7931956805beba80188f3c0638c285f8fb75dfe1.tar.gz
Merge branch 'PHP-7.4' into PHP-8.0
* PHP-7.4: Fix #51903: simplexml_load_file() doesn't use HTTP headers
Diffstat (limited to 'ext')
-rw-r--r--ext/libxml/libxml.c48
-rw-r--r--ext/libxml/tests/bug51903.phpt38
2 files changed, 86 insertions, 0 deletions
diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c
index 7dc4804905..fab8c3cf07 100644
--- a/ext/libxml/libxml.c
+++ b/ext/libxml/libxml.c
@@ -361,6 +361,54 @@ php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
return(NULL);
}
+ /* Check if there's been an external transport protocol with an encoding information */
+ if (enc == XML_CHAR_ENCODING_NONE) {
+ php_stream *s = (php_stream *) context;
+
+ if (Z_TYPE(s->wrapperdata) == IS_ARRAY) {
+ zval *header;
+
+ ZEND_HASH_FOREACH_VAL_IND(Z_ARRVAL(s->wrapperdata), header) {
+ const char buf[] = "Content-Type:";
+ if (Z_TYPE_P(header) == IS_STRING &&
+ !zend_binary_strncasecmp(Z_STRVAL_P(header), Z_STRLEN_P(header), buf, sizeof(buf)-1, sizeof(buf)-1)) {
+ char *needle = estrdup("charset=");
+ char *haystack = estrndup(Z_STRVAL_P(header), Z_STRLEN_P(header));
+ char *encoding = php_stristr(haystack, needle, Z_STRLEN_P(header), sizeof("charset=")-1);
+
+ if (encoding) {
+ char *end;
+
+ encoding += sizeof("charset=")-1;
+ if (*encoding == '"') {
+ encoding++;
+ }
+ end = strchr(encoding, ';');
+ if (end == NULL) {
+ end = encoding + strlen(encoding);
+ }
+ end--; /* end == encoding-1 isn't a buffer underrun */
+ while (*end == ' ' || *end == '\t') {
+ end--;
+ }
+ if (*end == '"') {
+ end--;
+ }
+ if (encoding >= end) continue;
+ *(end+1) = '\0';
+ enc = xmlParseCharEncoding(encoding);
+ if (enc <= XML_CHAR_ENCODING_NONE) {
+ enc = XML_CHAR_ENCODING_NONE;
+ }
+ }
+ efree(haystack);
+ efree(needle);
+ break; /* found content-type */
+ }
+ } ZEND_HASH_FOREACH_END();
+ }
+ }
+
/* Allocate the Input buffer front-end. */
ret = xmlAllocParserInputBuffer(enc);
if (ret != NULL) {
diff --git a/ext/libxml/tests/bug51903.phpt b/ext/libxml/tests/bug51903.phpt
new file mode 100644
index 0000000000..ebbca2068c
--- /dev/null
+++ b/ext/libxml/tests/bug51903.phpt
@@ -0,0 +1,38 @@
+--TEST--
+Bug #51903 (simplexml_load_file() doesn't use HTTP headers)
+--SKIPIF--
+<?php
+if (!extension_loaded('simplexml')) die('skip simplexml extension not available');
+if (@!include "./ext/standard/tests/http/server.inc") die('skip server.inc not available');
+http_server_skipif();
+?>
+--FILE--
+<?php
+require "./ext/standard/tests/http/server.inc";
+$responses = [
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=ISO-8859-1; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+ "data://text/plain,HTTP/1.1 200 OK\r\n"
+ . "Content-Type: text/xml; charset=\"ISO-8859-1\" ; foo=bar\r\n\r\n"
+ . "<?xml version=\"1.0\"?>\n"
+ . "<root>\xE4\xF6\xFC</root>\n",
+];
+['pid' => $pid, 'uri' => $uri] = http_server($responses);
+
+for ($i = 0; $i < count($responses); $i++) {
+ $sxe = simplexml_load_file($uri);
+ echo "$sxe\n";
+}
+
+http_server_kill($pid);
+?>
+--EXPECT--
+äöü
+äöü
+äöü