diff options
Diffstat (limited to 'Lib/xml')
-rw-r--r-- | Lib/xml/dom/xmlbuilder.py | 6 | ||||
-rw-r--r-- | Lib/xml/etree/ElementPath.py | 22 | ||||
-rw-r--r-- | Lib/xml/etree/ElementTree.py | 98 |
3 files changed, 48 insertions, 78 deletions
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 444f0b2a57..e9a1536472 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -353,14 +353,14 @@ class _AsyncDeprecatedProperty: class DocumentLS: """Mixin to create documents that conform to the load/save spec.""" - async = _AsyncDeprecatedProperty() async_ = False + locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning def _get_async(self): return False - def _set_async(self, async): - if async: + def _set_async(self, flag): + if flag: raise xml.dom.NotSupportedErr( "asynchronous document loading is not supported") diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index 5de42324c2..ab6b79a777 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -59,15 +59,15 @@ import re xpath_tokenizer_re = re.compile( - "(" - "'[^']*'|\"[^\"]*\"|" - "::|" - "//?|" - "\.\.|" - "\(\)|" - "[/.*:\[\]\(\)@=])|" - "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" - "\s+" + r"(" + r"'[^']*'|\"[^\"]*\"|" + r"::|" + r"//?|" + r"\.\.|" + r"\(\)|" + r"[/.*:\[\]\(\)@=])|" + r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" + r"\s+" ) def xpath_tokenizer(pattern, namespaces=None): @@ -180,7 +180,7 @@ def prepare_predicate(next, token): if elem.get(key) == value: yield elem return select - if signature == "-" and not re.match("\-?\d+$", predicate[0]): + if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): # [tag] tag = predicate[0] def select(context, result): @@ -188,7 +188,7 @@ def prepare_predicate(next, token): if elem.find(tag) is not None: yield elem return select - if signature == "-='" and not re.match("\-?\d+$", predicate[0]): + if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]): # [tag='value'] tag = predicate[0] value = predicate[-1] diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 92821c5706..735405681f 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -85,7 +85,7 @@ __all__ = [ "TreeBuilder", "VERSION", "XML", "XMLID", - "XMLParser", + "XMLParser", "XMLPullParser", "register_namespace", ] @@ -95,6 +95,7 @@ import sys import re import warnings import io +import collections import contextlib from . import ElementPath @@ -1029,7 +1030,7 @@ def register_namespace(prefix, uri): ValueError is raised if prefix is reserved or is invalid. """ - if re.match("ns\d+$", prefix): + if re.match(r"ns\d+$", prefix): raise ValueError("Prefix format reserved for internal use") for k, v in list(_namespace_map.items()): if k == uri or v == prefix: @@ -1084,7 +1085,7 @@ def _escape_attrib(text): if "\"" in text: text = text.replace("\"", """) # The following business with carriage returns is to satisfy - # Section 2.11 of the XML specification, stating that + # Section 2.11 of the XML specification, stating that # CR or CR LN should be replaced with just LN # http://www.w3.org/TR/REC-xml/#sec-line-ends if "\r\n" in text: @@ -1211,16 +1212,37 @@ def iterparse(source, events=None, parser=None): Returns an iterator providing (event, elem) pairs. """ + # Use the internal, undocumented _parser argument for now; When the + # parser argument of iterparse is removed, this can be killed. + pullparser = XMLPullParser(events=events, _parser=parser) + def iterator(): + try: + while True: + yield from pullparser.read_events() + # load event buffer + data = source.read(16 * 1024) + if not data: + break + pullparser.feed(data) + root = pullparser._close_and_return_root() + yield from pullparser.read_events() + it.root = root + finally: + if close_source: + source.close() + + class IterParseIterator(collections.Iterator): + __next__ = iterator().__next__ + it = IterParseIterator() + it.root = None + del iterator, IterParseIterator + close_source = False if not hasattr(source, "read"): source = open(source, "rb") close_source = True - try: - return _IterParseIterator(source, events, parser, close_source) - except: - if close_source: - source.close() - raise + + return it class XMLPullParser: @@ -1230,9 +1252,7 @@ class XMLPullParser: # upon in user code. It will be removed in a future release. # See http://bugs.python.org/issue17741 for more details. - # _elementtree.c expects a list, not a deque - self._events_queue = [] - self._index = 0 + self._events_queue = collections.deque() self._parser = _parser or XMLParser(target=TreeBuilder()) # wire up the parser for event reporting if events is None: @@ -1270,64 +1290,14 @@ class XMLPullParser: retrieved from the iterator. """ events = self._events_queue - while True: - index = self._index - try: - event = events[self._index] - # Avoid retaining references to past events - events[self._index] = None - except IndexError: - break - index += 1 - # Compact the list in a O(1) amortized fashion - # As noted above, _elementree.c needs a list, not a deque - if index * 2 >= len(events): - events[:index] = [] - self._index = 0 - else: - self._index = index + while events: + event = events.popleft() if isinstance(event, Exception): raise event else: yield event -class _IterParseIterator: - - def __init__(self, source, events, parser, close_source=False): - # Use the internal, undocumented _parser argument for now; When the - # parser argument of iterparse is removed, this can be killed. - self._parser = XMLPullParser(events=events, _parser=parser) - self._file = source - self._close_file = close_source - self.root = self._root = None - - def __next__(self): - try: - while 1: - for event in self._parser.read_events(): - return event - if self._parser._parser is None: - break - # load event buffer - data = self._file.read(16 * 1024) - if data: - self._parser.feed(data) - else: - self._root = self._parser._close_and_return_root() - self.root = self._root - except: - if self._close_file: - self._file.close() - raise - if self._close_file: - self._file.close() - raise StopIteration - - def __iter__(self): - return self - - def XML(text, parser=None): """Parse XML document from string constant. |