diff options
author | Isaac Muse <isaacmuse@gmail.com> | 2019-01-06 14:08:32 -0700 |
---|---|---|
committer | Isaac Muse <isaacmuse@gmail.com> | 2019-01-06 14:08:32 -0700 |
commit | de264e1294a5e2a9a00685e729f2eb0ec5129330 (patch) | |
tree | 30ca6313e245fd6e30d601ecb218e2f84812d878 | |
parent | 48c7ad53dade2827e89984ff40561f2d70bb6e46 (diff) | |
download | beautifulsoup4-de264e1294a5e2a9a00685e729f2eb0ec5129330.tar.gz |
Don't track un-prefixed namespaces
-rw-r--r-- | bs4/builder/_lxml.py | 12 | ||||
-rw-r--r-- | bs4/tests/test_lxml.py | 18 |
2 files changed, 24 insertions, 6 deletions
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index 3a1700c..b7e172c 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -71,12 +71,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): This might be useful later on when creating CSS selectors. """ for key, value in mapping.items(): - if key not in self.soup._namespaces: - # Let the BeautifulSoup object know about a new namespace. - # If there are multiple namespaces defined with the same - # prefix, the first one in the document takes precedence. - self.soup._namespaces[key] = value - + if key and key not in self.soup._namespaces: + # Let the BeautifulSoup object know about a new namespace. + # If there are multiple namespaces defined with the same + # prefix, the first one in the document takes precedence. + self.soup._namespaces[key] = value + def default_parser(self, encoding): # This can either return a parser object or a class, which # will be instantiated with default arguments. diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py index 8a8f690..1a4f27c 100644 --- a/bs4/tests/test_lxml.py +++ b/bs4/tests/test_lxml.py @@ -80,3 +80,21 @@ class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): @property def default_builder(self): return LXMLTreeBuilderForXML() + + def test_namespace_indexing(self): + # We should not track un-prefixed namespaces as we can only hold one + # and it will be recognized as the default namespace by soupsieve, + # which may be confusing in some situations. When no namespace is provided + # for a selector, the default namespace (if defined) is assumed. + + soup = self.soup( + '<?xml version="1.1"?>\n' + '<root>' + '<tag xmlns="http://unprefixed-namespace.com">content</tag>' + '<prefix:tag xmlns:prefix="http://prefixed-namespace.com">content</tag>' + '</root>' + ) + self.assertEqual( + soup._namespaces, + {'xml': 'http://www.w3.org/XML/1998/namespace', 'prefix': 'http://prefixed-namespace.com'} + ) |