diff options
author | R David Murray <rdmurray@bitdance.com> | 2012-04-13 21:27:19 -0400 |
---|---|---|
committer | R David Murray <rdmurray@bitdance.com> | 2012-04-13 21:27:19 -0400 |
commit | ea7bc055b08fb442986e0735b1b5b325b0b51ecc (patch) | |
tree | 20c2b837c787daf7e95ece075e6544627b17625a /Lib/xml/etree/ElementTree.py | |
parent | c32365d2ce552d1d17279e30a564509235ffb1db (diff) | |
parent | c2c9c905706160bf34aea12f2348210aac3e0da2 (diff) | |
download | cpython-ea7bc055b08fb442986e0735b1b5b325b0b51ecc.tar.gz |
Merge #14399: corrected news item
Diffstat (limited to 'Lib/xml/etree/ElementTree.py')
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 185 |
1 files changed, 135 insertions, 50 deletions
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index f94c48c09c..5f974f65b0 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -68,8 +68,9 @@ __all__ = [ "tostring", "tostringlist", "TreeBuilder", "VERSION", - "XML", + "XML", "XMLID", "XMLParser", "XMLTreeBuilder", + "register_namespace", ] VERSION = "1.3.0" @@ -100,7 +101,6 @@ import sys import re import warnings - class _SimpleElementPath: # emulate pre-1.2 find/findtext/findall behaviour def find(self, element, tag, namespaces=None): @@ -148,9 +148,9 @@ class ParseError(SyntaxError): # @defreturn flag def iselement(element): - # FIXME: not sure about this; might be a better idea to look - # for tag/attrib/text attributes - return isinstance(element, Element) or hasattr(element, "tag") + # FIXME: not sure about this; + # isinstance(element, Element) or look for tag/attrib/text attributes + return hasattr(element, 'tag') ## # Element class. This class defines the Element interface, and @@ -298,7 +298,7 @@ class Element: # @param element The element to add. def append(self, element): - # assert iselement(element) + self._assert_is_element(element) self._children.append(element) ## @@ -308,8 +308,8 @@ class Element: # @since 1.3 def extend(self, elements): - # for element in elements: - # assert iselement(element) + for element in elements: + self._assert_is_element(element) self._children.extend(elements) ## @@ -318,9 +318,13 @@ class Element: # @param index Where to insert the new subelement. def insert(self, index, element): - # assert iselement(element) + self._assert_is_element(element) self._children.insert(index, element) + def _assert_is_element(self, e): + if not isinstance(e, Element): + raise TypeError('expected an Element, not %s' % type(e).__name__) + ## # Removes a matching subelement. Unlike the <b>find</b> methods, # this method compares elements based on identity, not on tag @@ -1085,6 +1089,8 @@ _namespace_map = { # dublin core "http://purl.org/dc/elements/1.1/": "dc", } +# For tests and troubleshooting +register_namespace._namespace_map = _namespace_map def _raise_serialization_error(text): raise TypeError( @@ -1509,24 +1515,30 @@ class XMLParser: self.target = self._target = target self._error = expat.error self._names = {} # name memo cache - # callbacks + # main callbacks parser.DefaultHandlerExpand = self._default - parser.StartElementHandler = self._start - parser.EndElementHandler = self._end - parser.CharacterDataHandler = self._data - # optional callbacks - parser.CommentHandler = self._comment - parser.ProcessingInstructionHandler = self._pi + if hasattr(target, 'start'): + parser.StartElementHandler = self._start + if hasattr(target, 'end'): + parser.EndElementHandler = self._end + if hasattr(target, 'data'): + parser.CharacterDataHandler = target.data + # miscellaneous callbacks + if hasattr(target, 'comment'): + parser.CommentHandler = target.comment + if hasattr(target, 'pi'): + parser.ProcessingInstructionHandler = target.pi # let expat do the buffering, if supported try: - self._parser.buffer_text = 1 + parser.buffer_text = 1 except AttributeError: pass # use new-style attribute handling, if supported try: - self._parser.ordered_attributes = 1 - self._parser.specified_attributes = 1 - parser.StartElementHandler = self._start_list + parser.ordered_attributes = 1 + parser.specified_attributes = 1 + if hasattr(target, 'start'): + parser.StartElementHandler = self._start_list except AttributeError: pass self._doctype = None @@ -1570,44 +1582,29 @@ class XMLParser: attrib[fixname(attrib_in[i])] = attrib_in[i+1] return self.target.start(tag, attrib) - def _data(self, text): - return self.target.data(text) - def _end(self, tag): return self.target.end(self._fixname(tag)) - def _comment(self, data): - try: - comment = self.target.comment - except AttributeError: - pass - else: - return comment(data) - - def _pi(self, target, data): - try: - pi = self.target.pi - except AttributeError: - pass - else: - return pi(target, data) - def _default(self, text): prefix = text[:1] if prefix == "&": # deal with undefined entities try: - self.target.data(self.entity[text[1:-1]]) + data_handler = self.target.data + except AttributeError: + return + try: + data_handler(self.entity[text[1:-1]]) except KeyError: from xml.parsers import expat err = expat.error( "undefined entity %s: line %d, column %d" % - (text, self._parser.ErrorLineNumber, - self._parser.ErrorColumnNumber) + (text, self.parser.ErrorLineNumber, + self.parser.ErrorColumnNumber) ) err.code = 11 # XML_ERROR_UNDEFINED_ENTITY - err.lineno = self._parser.ErrorLineNumber - err.offset = self._parser.ErrorColumnNumber + err.lineno = self.parser.ErrorLineNumber + err.offset = self.parser.ErrorColumnNumber raise err elif prefix == "<" and text[:9] == "<!DOCTYPE": self._doctype = [] # inside a doctype declaration @@ -1634,7 +1631,7 @@ class XMLParser: pubid = pubid[1:-1] if hasattr(self.target, "doctype"): self.target.doctype(name, pubid, system[1:-1]) - elif self.doctype is not self._XMLParser__doctype: + elif self.doctype != self._XMLParser__doctype: # warn about deprecated call self._XMLParser__doctype(name, pubid, system[1:-1]) self.doctype(name, pubid, system[1:-1]) @@ -1665,7 +1662,7 @@ class XMLParser: def feed(self, data): try: - self._parser.Parse(data, 0) + self.parser.Parse(data, 0) except self._error as v: self._raiseerror(v) @@ -1677,12 +1674,100 @@ class XMLParser: def close(self): try: - self._parser.Parse("", 1) # end of data + self.parser.Parse("", 1) # end of data except self._error as v: self._raiseerror(v) - tree = self.target.close() - del self.target, self._parser # get rid of circular references - return tree + try: + close_handler = self.target.close + except AttributeError: + pass + else: + return close_handler() + finally: + # get rid of circular references + del self.parser, self._parser + del self.target, self._target + + +# Import the C accelerators +try: + # Element, SubElement, ParseError, TreeBuilder, XMLParser + from _elementtree import * +except ImportError: + pass +else: + # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser + + class ElementTree(ElementTree): + def parse(self, source, parser=None): + close_source = False + if not hasattr(source, 'read'): + source = open(source, 'rb') + close_source = True + try: + if parser is not None: + while True: + data = source.read(65536) + if not data: + break + parser.feed(data) + self._root = parser.close() + else: + parser = XMLParser() + self._root = parser._parse(source) + return self._root + finally: + if close_source: + source.close() + + class iterparse: + root = None + def __init__(self, file, events=None): + self._close_file = False + if not hasattr(file, 'read'): + file = open(file, 'rb') + self._close_file = True + self._file = file + self._events = [] + self._index = 0 + self._error = None + self.root = self._root = None + b = TreeBuilder() + self._parser = XMLParser(b) + self._parser._setevents(self._events, events) + + def __next__(self): + while True: + try: + item = self._events[self._index] + self._index += 1 + return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None + + def __iter__(self): + return self # compatibility XMLTreeBuilder = XMLParser |