summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2018-12-02 15:40:50 +0100
committerGitHub <noreply@github.com>2018-12-02 15:40:50 +0100
commit9ecef44311afe7082fdba124d5c1a688442b1854 (patch)
treec8424e9eaf9b8f68f5ad10d40bbc3bc06f048934
parent4432378cfc6d7bddb4cf9cac324606b9cae8647d (diff)
parent488286e179fc9b31df1570b4bca8d1ec9b1e4031 (diff)
downloadpython-lxml-9ecef44311afe7082fdba124d5c1a688442b1854.tar.gz
Merge pull request #267 from regebro/master
Let ElementTreeProducer use the available namespaces
-rw-r--r--CHANGES.txt10
-rw-r--r--src/lxml/sax.py37
-rw-r--r--src/lxml/tests/test_sax.py139
3 files changed, 173 insertions, 13 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 6501ec7a..09578bf6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -10,6 +10,10 @@ Features added
* The module ``lxml.sax`` is compiled using Cython in order to speed it up.
+* ElementTreeProducer now preserves the namespace prefixes. If two prefixes
+ point to the same URI, the first prefix in alphabetical order is used
+ for attributes.
+
* Updated ISO-Schematron implementation to 2013 version (now MIT licensed)
and the corresponding schema to the 2016 version (with optional "properties").
@@ -3893,16 +3897,16 @@ Features added
prefix to namespace URI mapping. This will create namespace
prefix declarations on these elements and these prefixes will show up
in XML serialization.
-
+
Bugs fixed
----------
-
+
* Killed yet another memory management related bug: trees created
using newDoc would not get a libxml2-level dictionary, which caused
problems when deallocating these documents later if they contained a
node that came from a document with a dictionary.
-* Moving namespaced elements between documents was problematic as
+* Moving namespaced elements between documents was problematic as
references to the original document would remain. This has been fixed
by applying xmlReconciliateNs() after each move operation.
diff --git a/src/lxml/sax.py b/src/lxml/sax.py
index 1d491c66..04c23922 100644
--- a/src/lxml/sax.py
+++ b/src/lxml/sax.py
@@ -191,7 +191,7 @@ class ElementTreeProducer(object):
self._content_handler.endDocument()
- def _recursive_saxify(self, element, prefixes):
+ def _recursive_saxify(self, element, parent_nsmap):
content_handler = self._content_handler
tag = element.tag
if tag is Comment or tag is ProcessingInstruction:
@@ -202,7 +202,14 @@ class ElementTreeProducer(object):
content_handler.characters(element.tail)
return
+ element_nsmap = element.nsmap
new_prefixes = []
+ if element_nsmap != parent_nsmap:
+ # There has been updates to the namespace
+ for prefix, ns_uri in element_nsmap.items():
+ if parent_nsmap.get(prefix) != ns_uri:
+ new_prefixes.append( (prefix, ns_uri) )
+
build_qname = self._build_qname
attribs = element.items()
if attribs:
@@ -212,13 +219,15 @@ class ElementTreeProducer(object):
attr_ns_tuple = _getNsTag(attr_ns_name)
attr_values[attr_ns_tuple] = value
attr_qnames[attr_ns_tuple] = build_qname(
- attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
+ attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
+ None, True)
sax_attributes = self._attr_class(attr_values, attr_qnames)
else:
sax_attributes = self._empty_attributes
ns_uri, local_name = _getNsTag(tag)
- qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
+ qname = build_qname(ns_uri, local_name, element_nsmap, element.prefix,
+ False)
for prefix, uri in new_prefixes:
content_handler.startPrefixMapping(prefix, uri)
@@ -227,23 +236,31 @@ class ElementTreeProducer(object):
if element.text:
content_handler.characters(element.text)
for child in element:
- self._recursive_saxify(child, prefixes)
+ self._recursive_saxify(child, element_nsmap)
content_handler.endElementNS((ns_uri, local_name), qname)
for prefix, uri in new_prefixes:
content_handler.endPrefixMapping(prefix)
if element.tail:
content_handler.characters(element.tail)
- def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
+ def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix,
+ is_attribute):
if ns_uri is None:
return local_name
- try:
- prefix = prefixes[ns_uri]
- except KeyError:
- prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
- new_prefixes.append( (prefix, ns_uri) )
+
+ if nsmap.get(preferred_prefix) == ns_uri and not is_attribute:
+ prefix = preferred_prefix
+ else:
+ # Pick the first matching prefix:
+ prefix = min(pfx for (pfx, uri) in nsmap.items()
+ if pfx is not None and uri == ns_uri)
+
+ if prefix is None:
+ # Default namespace
+ return local_name
return prefix + ':' + local_name
+
def saxify(element_or_tree, content_handler):
"""One-shot helper to generate SAX events from an XML tree and fire
them against a SAX ContentHandler.
diff --git a/src/lxml/tests/test_sax.py b/src/lxml/tests/test_sax.py
index 5b1b3089..adc5e736 100644
--- a/src/lxml/tests/test_sax.py
+++ b/src/lxml/tests/test_sax.py
@@ -13,6 +13,7 @@ if this_dir not in sys.path:
from common_imports import HelperTestCase, make_doctest, BytesIO, _bytes
from lxml import sax
from xml.dom import pulldom
+from xml.sax.handler import ContentHandler
class ETreeSaxTestCase(HelperTestCase):
@@ -87,6 +88,8 @@ class ETreeSaxTestCase(HelperTestCase):
dom.firstChild.localName)
self.assertEqual('blaA',
dom.firstChild.namespaceURI)
+ self.assertEqual(None,
+ dom.firstChild.prefix)
children = dom.firstChild.childNodes
self.assertEqual('ab',
@@ -96,6 +99,33 @@ class ETreeSaxTestCase(HelperTestCase):
self.assertEqual('ba',
children[2].nodeValue)
+ def test_sax_to_pulldom_multiple_namespaces(self):
+ tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>')
+ handler = pulldom.SAX2DOM()
+ sax.saxify(tree, handler)
+ dom = handler.document
+
+ # With multiple prefix definitions, the node should keep the one
+ # that was actually used, even if the others also are valid.
+ self.assertEqual('a',
+ dom.firstChild.localName)
+ self.assertEqual('blaA',
+ dom.firstChild.namespaceURI)
+ self.assertEqual(None,
+ dom.firstChild.prefix)
+
+ tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>')
+ handler = pulldom.SAX2DOM()
+ sax.saxify(tree, handler)
+ dom = handler.document
+
+ self.assertEqual('a',
+ dom.firstChild.localName)
+ self.assertEqual('blaA',
+ dom.firstChild.namespaceURI)
+ self.assertEqual('a',
+ dom.firstChild.prefix)
+
def test_element_sax(self):
tree = self.parse('<a><b/></a>')
a = tree.getroot()
@@ -267,9 +297,118 @@ class ETreeSaxTestCase(HelperTestCase):
return f.getvalue().replace(_bytes('\n'), _bytes(''))
+class SimpleContentHandler(ContentHandler, object):
+ """A SAX content handler that just stores the events"""
+
+ def __init__(self):
+ self.sax_events = []
+ super(SimpleContentHandler, self).__init__()
+
+ def startDocument(self):
+ self.sax_events.append(('startDocument',))
+
+ def endDocument(self):
+ self.sax_events.append(('endDocument',))
+
+ def startPrefixMapping(self, prefix, uri):
+ self.sax_events.append(('startPrefixMapping', prefix, uri))
+
+ def endPrefixMapping(self, prefix):
+ self.sax_events.append(('endPrefixMapping', prefix))
+
+ def startElement(self, name, attrs):
+ self.sax_events.append(('startElement', name, dict(attrs)))
+
+ def endElement(self, name):
+ self.sax_events.append(('endElement', name))
+
+ def startElementNS(self, name, qname, attrs):
+ self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
+
+ def endElementNS(self, name, qname):
+ self.sax_events.append(('endElementNS', name, qname))
+
+ def characters(self, content):
+ self.sax_events.append(('characters', content))
+
+ def ignorableWhitespace(self, whitespace):
+ self.sax_events.append(('ignorableWhitespace', whitespace))
+
+ def processingInstruction(self, target, data):
+ self.sax_events.append(('processingInstruction', target, data))
+
+ def skippedEntity(self, name):
+ self.sax_events.append(('skippedEntity', name))
+
+
+class NSPrefixSaxTestCase(HelperTestCase):
+ """Testing that namespaces generate the right SAX events"""
+
+ def _saxify(self, tree):
+ handler = SimpleContentHandler()
+ sax.ElementTreeProducer(tree, handler).saxify()
+ return handler.sax_events
+
+ def test_element_sax_ns_prefix(self):
+ # The name of the prefix should be preserved, if the uri is unique
+ tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">'
+ '<d a:attr="value" c:attr="value" /></a:a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ [('startElementNS', ('blaA', 'a'), 'a:a', {}),
+ ('startElementNS', (None, 'd'), 'd',
+ {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}),
+ ('endElementNS', (None, 'd'), 'd'),
+ ('endElementNS', ('blaA', 'a'), 'a:a'),
+ ],
+ self._saxify(a)[3:7])
+
+ def test_element_sax_default_ns_prefix(self):
+ # Default prefixes should also not get a generated prefix
+ tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ [('startDocument',),
+ # NS prefix should be None:
+ ('startPrefixMapping', None, 'blaA'),
+ ('startElementNS', ('blaA', 'a'), 'a', {}),
+ # Attribute prefix should be None:
+ ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}),
+ ('endElementNS', ('blaA', 'b'), 'b'),
+ ('endElementNS', ('blaA', 'a'), 'a'),
+ # Prefix should be None again:
+ ('endPrefixMapping', None),
+ ('endDocument',)],
+ self._saxify(a))
+
+ # Except for attributes, if there is both a default namespace
+ # and a named namespace with the same uri
+ tree = self.parse('<a xmlns="bla" xmlns:a="bla">'
+ '<b a:attr="value" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}),
+ self._saxify(a)[4])
+
+ def test_element_sax_twin_ns_prefix(self):
+ # Make an element with an doubly registered uri
+ tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">'
+ '<d c:attr="attr" /></a>')
+ a = tree.getroot()
+
+ self.assertEqual(
+ # It should get the b prefix in this case
+ ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}),
+ self._saxify(a)[4])
+
+
def test_suite():
suite = unittest.TestSuite()
suite.addTests([unittest.makeSuite(ETreeSaxTestCase)])
+ suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)])
suite.addTests(
[make_doctest('../../../doc/sax.txt')])
return suite