summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeonard Richardson <leonardr@segfault.org>2018-07-18 19:04:36 -0400
committerLeonard Richardson <leonardr@segfault.org>2018-07-18 19:04:36 -0400
commit668626385ccad937dc62a2d6c98988ede64778ca (patch)
treeac3c6a8269772bfd67c70723a462b641152d4d27
parentcb8353294f2c0c0cd3bedacf36ecdf7bc710c979 (diff)
downloadbeautifulsoup4-668626385ccad937dc62a2d6c98988ede64778ca.tar.gz
Preserve XML namespaces when they are introduced inside an XML
document, not just the ones introduced at the top level. [bug=1718787]
-rw-r--r--NEWS.txt3
-rw-r--r--bs4/builder/_lxml.py10
-rw-r--r--bs4/testing.py11
3 files changed, 19 insertions, 5 deletions
diff --git a/NEWS.txt b/NEWS.txt
index 52edd4c..47b2e70 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -3,6 +3,9 @@
* Stop data loss when encountering an empty numeric entity, and
possibly in other cases. Thanks to tos.kamiya for the fix. [bug=1698503]
+* Preserve XML namespaces introduced inside an XML document, not just
+ the ones introduced at the top level. [bug=1718787]
+
* Improved the warning given when no parser is specified. [bug=1780571]
* Fixed code that was causing deprecation warnings in recent Python 3
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index 3439271..4a0f7de 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -151,11 +151,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
attrs = dict(attrs)
nsprefix = None
# Invert each namespace map as it comes in.
- if len(self.nsmaps) > 1:
- # There are no new namespaces for this tag, but
- # non-default namespaces are in play, so we need a
- # separate tag stack to know when they end.
- self.nsmaps.append(None)
+ if len(nsmap) == 0 and len(self.nsmaps) > 1:
+ # There are no new namespaces for this tag, but
+ # non-default namespaces are in play, so we need a
+ # separate tag stack to know when they end.
+ self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
diff --git a/bs4/testing.py b/bs4/testing.py
index 9d42702..641663c 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -624,6 +624,17 @@ class XMLTreeBuilderSmokeTest(object):
self.assertEqual(
soup.encode("utf-8"), markup)
+ def test_nested_namespaces(self):
+ doc = """<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<parent xmlns="http://ns1/">
+<child xmlns="http://ns2/" xmlns:ns3="http://ns3/">
+<grandchild ns3:attr="value" xmlns="http://ns4/"/>
+</child>
+</parent>"""
+ soup = BeautifulSoup(doc, "lxml-xml")
+ self.assertEqual(doc, soup.encode())
+
def test_formatter_processes_script_tag_for_xml_documents(self):
doc = """
<script type="text/javascript">