summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-12-27 17:26:05 +0100
committerStefan Behnel <stefan_ml@behnel.de>2019-12-27 17:26:05 +0100
commit0810dcc7b4c125aa4564c3f0b797053f8541da24 (patch)
treefa5a82e6e782ff846fc62b45562ed99160742a57
parent551248f7fff4aeec8764811d707d4e51fadf99a8 (diff)
downloadpython-lxml-0810dcc7b4c125aa4564c3f0b797053f8541da24.tar.gz
LP#1844674: Include tail text of comments and PIs in itertext() results (regression in lxml 4.4).
-rw-r--r--src/lxml/etree.pyx4
-rw-r--r--src/lxml/tests/test_etree.py11
2 files changed, 13 insertions, 2 deletions
diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx
index db95f307..5f44df30 100644
--- a/src/lxml/etree.pyx
+++ b/src/lxml/etree.pyx
@@ -2966,9 +2966,9 @@ cdef class ElementTextIterator:
def __cinit__(self, _Element element not None, tag=None, *, bint with_tail=True):
_assertValidNode(element)
if with_tail:
- events = (u"start", u"end")
+ events = (u"start", u"comment", u"pi", u"end")
else:
- events = (u"start",)
+ events = (u"start", u"comment", u"pi")
self._start_element = element
self._nextEvent = iterwalk(element, events=events, tag=tag).__next__
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index afe5818b..027aae8a 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -1448,6 +1448,17 @@ class ETreeOnlyTestCase(HelperTestCase):
[1,2,1,4],
counts)
+ def test_itertext_comment_pi(self):
+ # https://bugs.launchpad.net/lxml/+bug/1844674
+ XML = self.etree.XML
+ root = XML(_bytes(
+ "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>"
+ ))
+
+ text = list(root.itertext())
+ self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "],
+ text)
+
def test_resolve_string_dtd(self):
parse = self.etree.parse
parser = self.etree.XMLParser(dtd_validation=True)