From 668626385ccad937dc62a2d6c98988ede64778ca Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Wed, 18 Jul 2018 19:04:36 -0400 Subject: Preserve XML namespaces when they are introduced inside an XML document, not just the ones introduced at the top level. [bug=1718787] --- NEWS.txt | 3 +++ bs4/builder/_lxml.py | 10 +++++----- bs4/testing.py | 11 +++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index 52edd4c..47b2e70 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -3,6 +3,9 @@ * Stop data loss when encountering an empty numeric entity, and possibly in other cases. Thanks to tos.kamiya for the fix. [bug=1698503] +* Preserve XML namespaces introduced inside an XML document, not just + the ones introduced at the top level. [bug=1718787] + * Improved the warning given when no parser is specified. [bug=1780571] * Fixed code that was causing deprecation warnings in recent Python 3 diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 3439271..4a0f7de 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -151,11 +151,11 @@ class LXMLTreeBuilderForXML(TreeBuilder): attrs = dict(attrs) nsprefix = None # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) + if len(nsmap) == 0 and len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. inverted_nsmap = dict((value, key) for key, value in nsmap.items()) diff --git a/bs4/testing.py b/bs4/testing.py index 9d42702..641663c 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -624,6 +624,17 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual( soup.encode("utf-8"), markup) + def test_nested_namespaces(self): + doc = """ + + + + + +""" + soup = BeautifulSoup(doc, "lxml-xml") + self.assertEqual(doc, soup.encode()) + def test_formatter_processes_script_tag_for_xml_documents(self): doc = """