diff options
author | Leonard Richardson <leonardr@segfault.org> | 2018-07-18 19:04:36 -0400 |
---|---|---|
committer | Leonard Richardson <leonardr@segfault.org> | 2018-07-18 19:04:36 -0400 |
commit | 668626385ccad937dc62a2d6c98988ede64778ca (patch) | |
tree | ac3c6a8269772bfd67c70723a462b641152d4d27 | |
parent | cb8353294f2c0c0cd3bedacf36ecdf7bc710c979 (diff) | |
download | beautifulsoup4-668626385ccad937dc62a2d6c98988ede64778ca.tar.gz |
Preserve XML namespaces when they are introduced inside an XML
document, not just the ones introduced at the top level. [bug=1718787]
-rw-r--r-- | NEWS.txt | 3 | ||||
-rw-r--r-- | bs4/builder/_lxml.py | 10 | ||||
-rw-r--r-- | bs4/testing.py | 11 |
3 files changed, 19 insertions, 5 deletions
@@ -3,6 +3,9 @@ * Stop data loss when encountering an empty numeric entity, and possibly in other cases. Thanks to tos.kamiya for the fix. [bug=1698503] +* Preserve XML namespaces introduced inside an XML document, not just + the ones introduced at the top level. [bug=1718787] + * Improved the warning given when no parser is specified. [bug=1780571] * Fixed code that was causing deprecation warnings in recent Python 3 diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 3439271..4a0f7de 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -151,11 +151,11 @@ class LXMLTreeBuilderForXML(TreeBuilder): attrs = dict(attrs) nsprefix = None # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) + if len(nsmap) == 0 and len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. inverted_nsmap = dict((value, key) for key, value in nsmap.items()) diff --git a/bs4/testing.py b/bs4/testing.py index 9d42702..641663c 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -624,6 +624,17 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual( soup.encode("utf-8"), markup) + def test_nested_namespaces(self): + doc = """<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> +<parent xmlns="http://ns1/"> +<child xmlns="http://ns2/" xmlns:ns3="http://ns3/"> +<grandchild ns3:attr="value" xmlns="http://ns4/"/> +</child> +</parent>""" + soup = BeautifulSoup(doc, "lxml-xml") + self.assertEqual(doc, soup.encode()) + def test_formatter_processes_script_tag_for_xml_documents(self): doc = """ <script type="text/javascript"> |