From 0810dcc7b4c125aa4564c3f0b797053f8541da24 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 27 Dec 2019 17:26:05 +0100 Subject: LP#1844674: Include tail text of comments and PIs in itertext() results (regression in lxml 4.4). --- src/lxml/etree.pyx | 4 ++-- src/lxml/tests/test_etree.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx index db95f307..5f44df30 100644 --- a/src/lxml/etree.pyx +++ b/src/lxml/etree.pyx @@ -2966,9 +2966,9 @@ cdef class ElementTextIterator: def __cinit__(self, _Element element not None, tag=None, *, bint with_tail=True): _assertValidNode(element) if with_tail: - events = (u"start", u"end") + events = (u"start", u"comment", u"pi", u"end") else: - events = (u"start",) + events = (u"start", u"comment", u"pi") self._start_element = element self._nextEvent = iterwalk(element, events=events, tag=tag).__next__ diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py index afe5818b..027aae8a 100644 --- a/src/lxml/tests/test_etree.py +++ b/src/lxml/tests/test_etree.py @@ -1448,6 +1448,17 @@ class ETreeOnlyTestCase(HelperTestCase): [1,2,1,4], counts) + def test_itertext_comment_pi(self): + # https://bugs.launchpad.net/lxml/+bug/1844674 + XML = self.etree.XML + root = XML(_bytes( + "RTEXTATAILCTAIL PITAIL " + )) + + text = list(root.itertext()) + self.assertEqual(["RTEXT", "ATAIL", "CTAIL", " PITAIL "], + text) + def test_resolve_string_dtd(self): parse = self.etree.parse parser = self.etree.XMLParser(dtd_validation=True) -- cgit v1.2.1