summaryrefslogtreecommitdiff
path: root/Lib/xml
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/dom/xmlbuilder.py6
-rw-r--r--Lib/xml/etree/ElementPath.py22
-rw-r--r--Lib/xml/etree/ElementTree.py98
3 files changed, 48 insertions, 78 deletions
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
index 444f0b2a57..e9a1536472 100644
--- a/Lib/xml/dom/xmlbuilder.py
+++ b/Lib/xml/dom/xmlbuilder.py
@@ -353,14 +353,14 @@ class _AsyncDeprecatedProperty:
class DocumentLS:
"""Mixin to create documents that conform to the load/save spec."""
- async = _AsyncDeprecatedProperty()
async_ = False
+ locals()['async'] = _AsyncDeprecatedProperty() # Avoid DeprecationWarning
def _get_async(self):
return False
- def _set_async(self, async):
- if async:
+ def _set_async(self, flag):
+ if flag:
raise xml.dom.NotSupportedErr(
"asynchronous document loading is not supported")
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index 5de42324c2..ab6b79a777 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -59,15 +59,15 @@
import re
xpath_tokenizer_re = re.compile(
- "("
- "'[^']*'|\"[^\"]*\"|"
- "::|"
- "//?|"
- "\.\.|"
- "\(\)|"
- "[/.*:\[\]\(\)@=])|"
- "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
- "\s+"
+ r"("
+ r"'[^']*'|\"[^\"]*\"|"
+ r"::|"
+ r"//?|"
+ r"\.\.|"
+ r"\(\)|"
+ r"[/.*:\[\]\(\)@=])|"
+ r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
+ r"\s+"
)
def xpath_tokenizer(pattern, namespaces=None):
@@ -180,7 +180,7 @@ def prepare_predicate(next, token):
if elem.get(key) == value:
yield elem
return select
- if signature == "-" and not re.match("\-?\d+$", predicate[0]):
+ if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
# [tag]
tag = predicate[0]
def select(context, result):
@@ -188,7 +188,7 @@ def prepare_predicate(next, token):
if elem.find(tag) is not None:
yield elem
return select
- if signature == "-='" and not re.match("\-?\d+$", predicate[0]):
+ if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
# [tag='value']
tag = predicate[0]
value = predicate[-1]
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index 92821c5706..735405681f 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -85,7 +85,7 @@ __all__ = [
"TreeBuilder",
"VERSION",
"XML", "XMLID",
- "XMLParser",
+ "XMLParser", "XMLPullParser",
"register_namespace",
]
@@ -95,6 +95,7 @@ import sys
import re
import warnings
import io
+import collections
import contextlib
from . import ElementPath
@@ -1029,7 +1030,7 @@ def register_namespace(prefix, uri):
ValueError is raised if prefix is reserved or is invalid.
"""
- if re.match("ns\d+$", prefix):
+ if re.match(r"ns\d+$", prefix):
raise ValueError("Prefix format reserved for internal use")
for k, v in list(_namespace_map.items()):
if k == uri or v == prefix:
@@ -1084,7 +1085,7 @@ def _escape_attrib(text):
if "\"" in text:
text = text.replace("\"", """)
# The following business with carriage returns is to satisfy
- # Section 2.11 of the XML specification, stating that
+ # Section 2.11 of the XML specification, stating that
# CR or CR LN should be replaced with just LN
# http://www.w3.org/TR/REC-xml/#sec-line-ends
if "\r\n" in text:
@@ -1211,16 +1212,37 @@ def iterparse(source, events=None, parser=None):
Returns an iterator providing (event, elem) pairs.
"""
+ # Use the internal, undocumented _parser argument for now; When the
+ # parser argument of iterparse is removed, this can be killed.
+ pullparser = XMLPullParser(events=events, _parser=parser)
+ def iterator():
+ try:
+ while True:
+ yield from pullparser.read_events()
+ # load event buffer
+ data = source.read(16 * 1024)
+ if not data:
+ break
+ pullparser.feed(data)
+ root = pullparser._close_and_return_root()
+ yield from pullparser.read_events()
+ it.root = root
+ finally:
+ if close_source:
+ source.close()
+
+ class IterParseIterator(collections.Iterator):
+ __next__ = iterator().__next__
+ it = IterParseIterator()
+ it.root = None
+ del iterator, IterParseIterator
+
close_source = False
if not hasattr(source, "read"):
source = open(source, "rb")
close_source = True
- try:
- return _IterParseIterator(source, events, parser, close_source)
- except:
- if close_source:
- source.close()
- raise
+
+ return it
class XMLPullParser:
@@ -1230,9 +1252,7 @@ class XMLPullParser:
# upon in user code. It will be removed in a future release.
# See http://bugs.python.org/issue17741 for more details.
- # _elementtree.c expects a list, not a deque
- self._events_queue = []
- self._index = 0
+ self._events_queue = collections.deque()
self._parser = _parser or XMLParser(target=TreeBuilder())
# wire up the parser for event reporting
if events is None:
@@ -1270,64 +1290,14 @@ class XMLPullParser:
retrieved from the iterator.
"""
events = self._events_queue
- while True:
- index = self._index
- try:
- event = events[self._index]
- # Avoid retaining references to past events
- events[self._index] = None
- except IndexError:
- break
- index += 1
- # Compact the list in a O(1) amortized fashion
- # As noted above, _elementree.c needs a list, not a deque
- if index * 2 >= len(events):
- events[:index] = []
- self._index = 0
- else:
- self._index = index
+ while events:
+ event = events.popleft()
if isinstance(event, Exception):
raise event
else:
yield event
-class _IterParseIterator:
-
- def __init__(self, source, events, parser, close_source=False):
- # Use the internal, undocumented _parser argument for now; When the
- # parser argument of iterparse is removed, this can be killed.
- self._parser = XMLPullParser(events=events, _parser=parser)
- self._file = source
- self._close_file = close_source
- self.root = self._root = None
-
- def __next__(self):
- try:
- while 1:
- for event in self._parser.read_events():
- return event
- if self._parser._parser is None:
- break
- # load event buffer
- data = self._file.read(16 * 1024)
- if data:
- self._parser.feed(data)
- else:
- self._root = self._parser._close_and_return_root()
- self.root = self._root
- except:
- if self._close_file:
- self._file.close()
- raise
- if self._close_file:
- self._file.close()
- raise StopIteration
-
- def __iter__(self):
- return self
-
-
def XML(text, parser=None):
"""Parse XML document from string constant.