diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lxml/sax.py | 37 | ||||
-rw-r--r-- | src/lxml/tests/test_sax.py | 139 |
2 files changed, 166 insertions, 10 deletions
diff --git a/src/lxml/sax.py b/src/lxml/sax.py index 1d491c66..04c23922 100644 --- a/src/lxml/sax.py +++ b/src/lxml/sax.py @@ -191,7 +191,7 @@ class ElementTreeProducer(object): self._content_handler.endDocument() - def _recursive_saxify(self, element, prefixes): + def _recursive_saxify(self, element, parent_nsmap): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: @@ -202,7 +202,14 @@ class ElementTreeProducer(object): content_handler.characters(element.tail) return + element_nsmap = element.nsmap new_prefixes = [] + if element_nsmap != parent_nsmap: + # There has been updates to the namespace + for prefix, ns_uri in element_nsmap.items(): + if parent_nsmap.get(prefix) != ns_uri: + new_prefixes.append( (prefix, ns_uri) ) + build_qname = self._build_qname attribs = element.items() if attribs: @@ -212,13 +219,15 @@ class ElementTreeProducer(object): attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( - attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) + attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap, + None, True) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) - qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) + qname = build_qname(ns_uri, local_name, element_nsmap, element.prefix, + False) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) @@ -227,23 +236,31 @@ class ElementTreeProducer(object): if element.text: content_handler.characters(element.text) for child in element: - self._recursive_saxify(child, prefixes) + self._recursive_saxify(child, element_nsmap) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail) - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes): + def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, + is_attribute): if ns_uri is None: return local_name - try: - prefix = prefixes[ns_uri] - except KeyError: - prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) - new_prefixes.append( (prefix, ns_uri) ) + + if nsmap.get(preferred_prefix) == ns_uri and not is_attribute: + prefix = preferred_prefix + else: + # Pick the first matching prefix: + prefix = min(pfx for (pfx, uri) in nsmap.items() + if pfx is not None and uri == ns_uri) + + if prefix is None: + # Default namespace + return local_name return prefix + ':' + local_name + def saxify(element_or_tree, content_handler): """One-shot helper to generate SAX events from an XML tree and fire them against a SAX ContentHandler. diff --git a/src/lxml/tests/test_sax.py b/src/lxml/tests/test_sax.py index 5b1b3089..adc5e736 100644 --- a/src/lxml/tests/test_sax.py +++ b/src/lxml/tests/test_sax.py @@ -13,6 +13,7 @@ if this_dir not in sys.path: from common_imports import HelperTestCase, make_doctest, BytesIO, _bytes from lxml import sax from xml.dom import pulldom +from xml.sax.handler import ContentHandler class ETreeSaxTestCase(HelperTestCase): @@ -87,6 +88,8 @@ class ETreeSaxTestCase(HelperTestCase): dom.firstChild.localName) self.assertEqual('blaA', dom.firstChild.namespaceURI) + self.assertEqual(None, + dom.firstChild.prefix) children = dom.firstChild.childNodes self.assertEqual('ab', @@ -96,6 +99,33 @@ class ETreeSaxTestCase(HelperTestCase): self.assertEqual('ba', children[2].nodeValue) + def test_sax_to_pulldom_multiple_namespaces(self): + tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>') + handler = pulldom.SAX2DOM() + sax.saxify(tree, handler) + dom = handler.document + + # With multiple prefix definitions, the node should keep the one + # that was actually used, even if the others also are valid. + self.assertEqual('a', + dom.firstChild.localName) + self.assertEqual('blaA', + dom.firstChild.namespaceURI) + self.assertEqual(None, + dom.firstChild.prefix) + + tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>') + handler = pulldom.SAX2DOM() + sax.saxify(tree, handler) + dom = handler.document + + self.assertEqual('a', + dom.firstChild.localName) + self.assertEqual('blaA', + dom.firstChild.namespaceURI) + self.assertEqual('a', + dom.firstChild.prefix) + def test_element_sax(self): tree = self.parse('<a><b/></a>') a = tree.getroot() @@ -267,9 +297,118 @@ class ETreeSaxTestCase(HelperTestCase): return f.getvalue().replace(_bytes('\n'), _bytes('')) +class SimpleContentHandler(ContentHandler, object): + """A SAX content handler that just stores the events""" + + def __init__(self): + self.sax_events = [] + super(SimpleContentHandler, self).__init__() + + def startDocument(self): + self.sax_events.append(('startDocument',)) + + def endDocument(self): + self.sax_events.append(('endDocument',)) + + def startPrefixMapping(self, prefix, uri): + self.sax_events.append(('startPrefixMapping', prefix, uri)) + + def endPrefixMapping(self, prefix): + self.sax_events.append(('endPrefixMapping', prefix)) + + def startElement(self, name, attrs): + self.sax_events.append(('startElement', name, dict(attrs))) + + def endElement(self, name): + self.sax_events.append(('endElement', name)) + + def startElementNS(self, name, qname, attrs): + self.sax_events.append(('startElementNS', name, qname, attrs._qnames)) + + def endElementNS(self, name, qname): + self.sax_events.append(('endElementNS', name, qname)) + + def characters(self, content): + self.sax_events.append(('characters', content)) + + def ignorableWhitespace(self, whitespace): + self.sax_events.append(('ignorableWhitespace', whitespace)) + + def processingInstruction(self, target, data): + self.sax_events.append(('processingInstruction', target, data)) + + def skippedEntity(self, name): + self.sax_events.append(('skippedEntity', name)) + + +class NSPrefixSaxTestCase(HelperTestCase): + """Testing that namespaces generate the right SAX events""" + + def _saxify(self, tree): + handler = SimpleContentHandler() + sax.ElementTreeProducer(tree, handler).saxify() + return handler.sax_events + + def test_element_sax_ns_prefix(self): + # The name of the prefix should be preserved, if the uri is unique + tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">' + '<d a:attr="value" c:attr="value" /></a:a>') + a = tree.getroot() + + self.assertEqual( + [('startElementNS', ('blaA', 'a'), 'a:a', {}), + ('startElementNS', (None, 'd'), 'd', + {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}), + ('endElementNS', (None, 'd'), 'd'), + ('endElementNS', ('blaA', 'a'), 'a:a'), + ], + self._saxify(a)[3:7]) + + def test_element_sax_default_ns_prefix(self): + # Default prefixes should also not get a generated prefix + tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>') + a = tree.getroot() + + self.assertEqual( + [('startDocument',), + # NS prefix should be None: + ('startPrefixMapping', None, 'blaA'), + ('startElementNS', ('blaA', 'a'), 'a', {}), + # Attribute prefix should be None: + ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}), + ('endElementNS', ('blaA', 'b'), 'b'), + ('endElementNS', ('blaA', 'a'), 'a'), + # Prefix should be None again: + ('endPrefixMapping', None), + ('endDocument',)], + self._saxify(a)) + + # Except for attributes, if there is both a default namespace + # and a named namespace with the same uri + tree = self.parse('<a xmlns="bla" xmlns:a="bla">' + '<b a:attr="value" /></a>') + a = tree.getroot() + + self.assertEqual( + ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}), + self._saxify(a)[4]) + + def test_element_sax_twin_ns_prefix(self): + # Make an element with an doubly registered uri + tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">' + '<d c:attr="attr" /></a>') + a = tree.getroot() + + self.assertEqual( + # It should get the b prefix in this case + ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}), + self._saxify(a)[4]) + + def test_suite(): suite = unittest.TestSuite() suite.addTests([unittest.makeSuite(ETreeSaxTestCase)]) + suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)]) suite.addTests( [make_doctest('../../../doc/sax.txt')]) return suite |