diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-12-27 17:26:05 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-12-27 17:26:05 +0100 |
commit | 0810dcc7b4c125aa4564c3f0b797053f8541da24 (patch) | |
tree | fa5a82e6e782ff846fc62b45562ed99160742a57 | |
parent | 551248f7fff4aeec8764811d707d4e51fadf99a8 (diff) | |
download | python-lxml-0810dcc7b4c125aa4564c3f0b797053f8541da24.tar.gz |
LP#1844674: Include tail text of comments and PIs in itertext() results (regression in lxml 4.4).
-rw-r--r-- | src/lxml/etree.pyx | 4 | ||||
-rw-r--r-- | src/lxml/tests/test_etree.py | 11 |
2 files changed, 13 insertions, 2 deletions
diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx index db95f307..5f44df30 100644 --- a/src/lxml/etree.pyx +++ b/src/lxml/etree.pyx @@ -2966,9 +2966,9 @@ cdef class ElementTextIterator: def __cinit__(self, _Element element not None, tag=None, *, bint with_tail=True): _assertValidNode(element) if with_tail: - events = (u"start", u"end") + events = (u"start", u"comment", u"pi", u"end") else: - events = (u"start",) + events = (u"start", u"comment", u"pi") self._start_element = element self._nextEvent = iterwalk(element, events=events, tag=tag).__next__ diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index afe5818b..027aae8a 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1448,6 +1448,17 @@ class ETreeOnlyTestCase(HelperTestCase): [1,2,1,4], counts) + def test_itertext_comment_pi(self): + # https://bugs.launchpad.net/lxml/+bug/1844674 + XML = self.etree.XML + root = XML(_bytes( + "<root>RTEXT<a></a>ATAIL<b/><!-- COMMENT -->CTAIL<?PI PITEXT?> PITAIL </root>" + )) + + text = list(root.itertext()) + self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "], + text) + def test_resolve_string_dtd(self): parse = self.etree.parse parser = self.etree.XMLParser(dtd_validation=True) |