diff options
author | Christoph M. Becker <cmbecker69@gmx.de> | 2020-10-23 11:06:30 +0200 |
---|---|---|
committer | Christoph M. Becker <cmbecker69@gmx.de> | 2020-10-26 13:08:05 +0100 |
commit | 6d2bc7253018baa57487f622e706b8962c16d148 (patch) | |
tree | a90e90e3628f0b020b50fce0caec36148fcb0abe | |
parent | 824cbc27819f3f6dd6de24e182dbb7de6273b2ab (diff) | |
download | php-git-6d2bc7253018baa57487f622e706b8962c16d148.tar.gz |
Fix #80268: loadHTML() truncates at NUL bytes
libxml2 has no particular issues parsing HTML strings with NUL bytes;
these just cause truncation of the current text content, but parsing
continues generally. Since `::loadHTMLFile()` already supports NUL
bytes, `::loadHTML()` should as well.
Note that this is different from XML, which does not allow any NUL
bytes.
Closes GH-6368.
-rw-r--r-- | NEWS | 3 | ||||
-rw-r--r-- | ext/dom/document.c | 1 | ||||
-rw-r--r-- | ext/dom/tests/bug80268.phpt | 24 |
3 files changed, 27 insertions, 1 deletions
@@ -11,6 +11,9 @@ PHP NEWS - COM: . Fixed bug #62474 (com_event_sink crashes on certain arguments). (cmb) +- DOM: + . Fixed bug #80268 (loadHTML() truncates at NUL bytes). (cmb) + - IMAP: . Fixed bug #64076 (imap_sort() does not return FALSE on failure). (cmb) . Fixed bug #76618 (segfault on imap_reopen). (girgias) diff --git a/ext/dom/document.c b/ext/dom/document.c index 22bb90d5d8..0e15e7a110 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -2024,7 +2024,6 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ } ctxt = htmlCreateFileParserCtxt(source, NULL); } else { - source_len = xmlStrlen((xmlChar *) source); if (ZEND_SIZE_T_INT_OVFL(source_len)) { php_error_docref(NULL, E_WARNING, "Input string is too long"); RETURN_FALSE; diff --git a/ext/dom/tests/bug80268.phpt b/ext/dom/tests/bug80268.phpt new file mode 100644 index 0000000000..0fe50b85e8 --- /dev/null +++ b/ext/dom/tests/bug80268.phpt @@ -0,0 +1,24 @@ +--TEST-- +Bug #80268 (loadHTML() truncates at NUL bytes) +--SKIPIF-- +<?php require_once('skipif.inc'); ?> +--FILE-- +<?php +$doc = new DOMDocument; +$doc->loadHTML("<p>foo\0bar</p>"); +$html = $doc->saveHTML(); +var_dump(strpos($html, '<p>foo</p>') !== false); + +file_put_contents(__DIR__ . '/80268.html', "<p>foo\0bar</p>"); +$doc = new DOMDocument; +$doc->loadHTMLFile(__DIR__ . '/80268.html'); +$html = $doc->saveHTML(); +var_dump(strpos($html, '<p>foo</p>') !== false); +?> +--CLEAN-- +<?php +unlink(__DIR__ . '/80268.html'); +?> +--EXPECT-- +bool(true) +bool(true) |