summaryrefslogtreecommitdiff
path: root/Lib/xml
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/xml')
-rw-r--r--Lib/xml/dom/__init__.py1
-rw-r--r--Lib/xml/dom/domreg.py2
-rw-r--r--Lib/xml/dom/expatbuilder.py42
-rw-r--r--Lib/xml/dom/minidom.py271
-rw-r--r--Lib/xml/dom/pulldom.py6
-rw-r--r--Lib/xml/etree/ElementTree.py185
-rw-r--r--Lib/xml/etree/cElementTree.py4
-rw-r--r--Lib/xml/parsers/expat.py2
8 files changed, 296 insertions, 217 deletions
diff --git a/Lib/xml/dom/__init__.py b/Lib/xml/dom/__init__.py
index 4401bdfcf3..97cf9a6429 100644
--- a/Lib/xml/dom/__init__.py
+++ b/Lib/xml/dom/__init__.py
@@ -17,6 +17,7 @@ pulldom -- DOM builder supporting on-demand tree-building for selected
class Node:
"""Class giving the NodeType constants."""
+ __slots__ = ()
# DOM implementations may use this as a base class for their own
# Node implementations. If they don't, the constants defined here
diff --git a/Lib/xml/dom/domreg.py b/Lib/xml/dom/domreg.py
index cb35bb0596..8c3d901acb 100644
--- a/Lib/xml/dom/domreg.py
+++ b/Lib/xml/dom/domreg.py
@@ -2,8 +2,6 @@
directly. Instead, the functions getDOMImplementation and
registerDOMImplementation should be imported from xml.dom."""
-from xml.dom.minicompat import * # isinstance, StringTypes
-
# This is a list of well-known implementations. Well-known names
# should be published by posting to xml-sig@python.org, and are
# subsequently recorded in this file.
diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py
index a98fe03235..f074ab932f 100644
--- a/Lib/xml/dom/expatbuilder.py
+++ b/Lib/xml/dom/expatbuilder.py
@@ -33,8 +33,6 @@ from xml.parsers import expat
from xml.dom.minidom import _append_child, _set_attribute_node
from xml.dom.NodeFilter import NodeFilter
-from xml.dom.minicompat import *
-
TEXT_NODE = Node.TEXT_NODE
CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
DOCUMENT_NODE = Node.DOCUMENT_NODE
@@ -283,27 +281,23 @@ class ExpatBuilder:
elif childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
value = node.data + data
- d = node.__dict__
- d['data'] = d['nodeValue'] = value
+ node.data = value
return
else:
node = minidom.Text()
- d = node.__dict__
- d['data'] = d['nodeValue'] = data
- d['ownerDocument'] = self.document
+ node.data = data
+ node.ownerDocument = self.document
_append_child(self.curNode, node)
def character_data_handler(self, data):
childNodes = self.curNode.childNodes
if childNodes and childNodes[-1].nodeType == TEXT_NODE:
node = childNodes[-1]
- d = node.__dict__
- d['data'] = d['nodeValue'] = node.data + data
+ node.data = node.data + data
return
node = minidom.Text()
- d = node.__dict__
- d['data'] = d['nodeValue'] = node.data + data
- d['ownerDocument'] = self.document
+ node.data = node.data + data
+ node.ownerDocument = self.document
_append_child(self.curNode, node)
def entity_decl_handler(self, entityName, is_parameter_entity, value,
@@ -363,11 +357,8 @@ class ExpatBuilder:
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
value = attributes[i+1]
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = value
- d = a.__dict__
- d['value'] = d['nodeValue'] = value
- d['ownerDocument'] = self.document
+ a.value = value
+ a.ownerDocument = self.document
_set_attribute_node(node, a)
if node is not self.document.documentElement:
@@ -761,15 +752,13 @@ class Namespaces:
else:
a = minidom.Attr("xmlns", XMLNS_NAMESPACE,
"xmlns", EMPTY_PREFIX)
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = uri
- d = a.__dict__
- d['value'] = d['nodeValue'] = uri
- d['ownerDocument'] = self.document
+ a.value = uri
+ a.ownerDocument = self.document
_set_attribute_node(node, a)
del self._ns_ordered_prefixes[:]
if attributes:
+ node._ensure_attributes()
_attrs = node._attrs
_attrsNS = node._attrsNS
for i in range(0, len(attributes), 2):
@@ -785,12 +774,9 @@ class Namespaces:
aname, EMPTY_PREFIX)
_attrs[aname] = a
_attrsNS[(EMPTY_NAMESPACE, aname)] = a
- d = a.childNodes[0].__dict__
- d['data'] = d['nodeValue'] = value
- d = a.__dict__
- d['ownerDocument'] = self.document
- d['value'] = d['nodeValue'] = value
- d['ownerElement'] = node
+ a.ownerDocument = self.document
+ a.value = value
+ a.ownerElement = node
if __debug__:
# This only adds some asserts to the original
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index f23ad05333..275e20c420 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -62,10 +62,7 @@ class Node(xml.dom.Node):
return writer.stream.getvalue()
def hasChildNodes(self):
- if self.childNodes:
- return True
- else:
- return False
+ return bool(self.childNodes)
def _get_childNodes(self):
return self.childNodes
@@ -286,10 +283,10 @@ def _append_child(self, node):
childNodes = self.childNodes
if childNodes:
last = childNodes[-1]
- node.__dict__["previousSibling"] = last
- last.__dict__["nextSibling"] = node
+ node.previousSibling = last
+ last.nextSibling = node
childNodes.append(node)
- node.__dict__["parentNode"] = self
+ node.parentNode = self
def _in_document(node):
# return True iff node is part of a document tree
@@ -342,9 +339,10 @@ class DocumentFragment(Node):
class Attr(Node):
+ __slots__=('_name', '_value', 'namespaceURI',
+ '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
nodeType = Node.ATTRIBUTE_NODE
attributes = None
- ownerElement = None
specified = False
_is_id = False
@@ -352,12 +350,11 @@ class Attr(Node):
def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
prefix=None):
- # skip setattr for performance
- d = self.__dict__
- d["nodeName"] = d["name"] = qName
- d["namespaceURI"] = namespaceURI
- d["prefix"] = prefix
- d['childNodes'] = NodeList()
+ self.ownerElement = None
+ self._name = qName
+ self.namespaceURI = namespaceURI
+ self._prefix = prefix
+ self.childNodes = NodeList()
# Add the single child node that represents the value of the attr
self.childNodes.append(Text())
@@ -365,9 +362,10 @@ class Attr(Node):
# nodeValue and value are set elsewhere
def _get_localName(self):
- if 'localName' in self.__dict__:
- return self.__dict__['localName']
- return self.nodeName.split(":", 1)[-1]
+ try:
+ return self._localName
+ except AttributeError:
+ return self.nodeName.split(":", 1)[-1]
def _get_name(self):
return self.name
@@ -375,20 +373,30 @@ class Attr(Node):
def _get_specified(self):
return self.specified
- def __setattr__(self, name, value):
- d = self.__dict__
- if name in ("value", "nodeValue"):
- d["value"] = d["nodeValue"] = value
- d2 = self.childNodes[0].__dict__
- d2["data"] = d2["nodeValue"] = value
- if self.ownerElement is not None:
- _clear_id_cache(self.ownerElement)
- elif name in ("name", "nodeName"):
- d["name"] = d["nodeName"] = value
- if self.ownerElement is not None:
- _clear_id_cache(self.ownerElement)
- else:
- d[name] = value
+ def _get_name(self):
+ return self._name
+
+ def _set_name(self, value):
+ self._name = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
+
+ nodeName = name = property(_get_name, _set_name)
+
+ def _get_value(self):
+ return self._value
+
+ def _set_value(self, value):
+ self._value = value
+ self.childNodes[0].data = value
+ if self.ownerElement is not None:
+ _clear_id_cache(self.ownerElement)
+ self.childNodes[0].data = value
+
+ nodeValue = value = property(_get_value, _set_value)
+
+ def _get_prefix(self):
+ return self._prefix
def _set_prefix(self, prefix):
nsuri = self.namespaceURI
@@ -396,22 +404,16 @@ class Attr(Node):
if nsuri and nsuri != XMLNS_NAMESPACE:
raise xml.dom.NamespaceErr(
"illegal use of 'xmlns' prefix for the wrong namespace")
- d = self.__dict__
- d['prefix'] = prefix
+ self._prefix = prefix
if prefix is None:
newName = self.localName
else:
newName = "%s:%s" % (prefix, self.localName)
if self.ownerElement:
_clear_id_cache(self.ownerElement)
- d['nodeName'] = d['name'] = newName
+ self.name = newName
- def _set_value(self, value):
- d = self.__dict__
- d['value'] = d['nodeValue'] = value
- if self.ownerElement:
- _clear_id_cache(self.ownerElement)
- self.childNodes[0].data = value
+ prefix = property(_get_prefix, _set_prefix)
def unlink(self):
# This implementation does not call the base implementation
@@ -586,8 +588,8 @@ class NamedNodeMap(object):
_clear_id_cache(self._ownerElement)
del self._attrs[n.nodeName]
del self._attrsNS[(n.namespaceURI, n.localName)]
- if 'ownerElement' in n.__dict__:
- n.__dict__['ownerElement'] = None
+ if hasattr(n, 'ownerElement'):
+ n.ownerElement = None
return n
else:
raise xml.dom.NotFoundErr()
@@ -598,8 +600,8 @@ class NamedNodeMap(object):
_clear_id_cache(self._ownerElement)
del self._attrsNS[(n.namespaceURI, n.localName)]
del self._attrs[n.nodeName]
- if 'ownerElement' in n.__dict__:
- n.__dict__['ownerElement'] = None
+ if hasattr(n, 'ownerElement'):
+ n.ownerElement = None
return n
else:
raise xml.dom.NotFoundErr()
@@ -659,6 +661,9 @@ class TypeInfo(object):
_no_type = TypeInfo(None, None)
class Element(Node):
+ __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
+ 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
+ 'nextSibling', 'previousSibling')
nodeType = Node.ELEMENT_NODE
nodeValue = None
schemaType = _no_type
@@ -674,41 +679,57 @@ class Element(Node):
def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
localName=None):
+ self.parentNode = None
self.tagName = self.nodeName = tagName
self.prefix = prefix
self.namespaceURI = namespaceURI
self.childNodes = NodeList()
+ self.nextSibling = self.previousSibling = None
+
+ # Attribute dictionaries are lazily created
+ # attributes are double-indexed:
+ # tagName -> Attribute
+ # URI,localName -> Attribute
+ # in the future: consider lazy generation
+ # of attribute objects this is too tricky
+ # for now because of headaches with
+ # namespaces.
+ self._attrs = None
+ self._attrsNS = None
- self._attrs = {} # attributes are double-indexed:
- self._attrsNS = {} # tagName -> Attribute
- # URI,localName -> Attribute
- # in the future: consider lazy generation
- # of attribute objects this is too tricky
- # for now because of headaches with
- # namespaces.
+ def _ensure_attributes(self):
+ if self._attrs is None:
+ self._attrs = {}
+ self._attrsNS = {}
def _get_localName(self):
- if 'localName' in self.__dict__:
- return self.__dict__['localName']
- return self.tagName.split(":", 1)[-1]
+ try:
+ return self._localName
+ except AttributeError:
+ return self.tagName.split(":", 1)[-1]
def _get_tagName(self):
return self.tagName
def unlink(self):
- for attr in list(self._attrs.values()):
- attr.unlink()
+ if self._attrs is not None:
+ for attr in list(self._attrs.values()):
+ attr.unlink()
self._attrs = None
self._attrsNS = None
Node.unlink(self)
def getAttribute(self, attname):
+ if self._attrs is None:
+ return ""
try:
return self._attrs[attname].value
except KeyError:
return ""
def getAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return ""
try:
return self._attrsNS[(namespaceURI, localName)].value
except KeyError:
@@ -718,14 +739,11 @@ class Element(Node):
attr = self.getAttributeNode(attname)
if attr is None:
attr = Attr(attname)
- # for performance
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
+ attr.value = value # also sets nodeValue
+ attr.ownerDocument = self.ownerDocument
self.setAttributeNode(attr)
elif value != attr.value:
- d = attr.__dict__
- d["value"] = d["nodeValue"] = value
+ attr.value = value
if attr.isId:
_clear_id_cache(self)
@@ -733,33 +751,33 @@ class Element(Node):
prefix, localname = _nssplit(qualifiedName)
attr = self.getAttributeNodeNS(namespaceURI, localname)
if attr is None:
- # for performance
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
- d = attr.__dict__
- d["prefix"] = prefix
- d["nodeName"] = qualifiedName
- d["value"] = d["nodeValue"] = value
- d["ownerDocument"] = self.ownerDocument
+ attr.value = value
+ attr.ownerDocument = self.ownerDocument
self.setAttributeNode(attr)
else:
- d = attr.__dict__
if value != attr.value:
- d["value"] = d["nodeValue"] = value
+ attr.value = value
if attr.isId:
_clear_id_cache(self)
if attr.prefix != prefix:
- d["prefix"] = prefix
- d["nodeName"] = qualifiedName
+ attr.prefix = prefix
+ attr.nodeName = qualifiedName
def getAttributeNode(self, attrname):
+ if self._attrs is None:
+ return None
return self._attrs.get(attrname)
def getAttributeNodeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return None
return self._attrsNS.get((namespaceURI, localName))
def setAttributeNode(self, attr):
if attr.ownerElement not in (None, self):
raise xml.dom.InuseAttributeErr("attribute node already owned")
+ self._ensure_attributes()
old1 = self._attrs.get(attr.name, None)
if old1 is not None:
self.removeAttributeNode(old1)
@@ -778,6 +796,8 @@ class Element(Node):
setAttributeNodeNS = setAttributeNode
def removeAttribute(self, name):
+ if self._attrsNS is None:
+ raise xml.dom.NotFoundErr()
try:
attr = self._attrs[name]
except KeyError:
@@ -785,6 +805,8 @@ class Element(Node):
self.removeAttributeNode(attr)
def removeAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ raise xml.dom.NotFoundErr()
try:
attr = self._attrsNS[(namespaceURI, localName)]
except KeyError:
@@ -807,9 +829,13 @@ class Element(Node):
removeAttributeNodeNS = removeAttributeNode
def hasAttribute(self, name):
+ if self._attrs is None:
+ return False
return name in self._attrs
def hasAttributeNS(self, namespaceURI, localName):
+ if self._attrsNS is None:
+ return False
return (namespaceURI, localName) in self._attrsNS
def getElementsByTagName(self, name):
@@ -850,6 +876,7 @@ class Element(Node):
writer.write("/>%s"%(newl))
def _get_attributes(self):
+ self._ensure_attributes()
return NamedNodeMap(self._attrs, self._attrsNS, self)
def hasAttributes(self):
@@ -874,7 +901,7 @@ class Element(Node):
if _get_containing_entref(self) is not None:
raise xml.dom.NoModificationAllowedErr()
if not idAttr._is_id:
- idAttr.__dict__['_is_id'] = True
+ idAttr._is_id = True
self._magic_id_nodes += 1
self.ownerDocument._magic_id_count += 1
_clear_id_cache(self)
@@ -887,19 +914,20 @@ defproperty(Element, "localName",
def _set_attribute_node(element, attr):
_clear_id_cache(element)
+ element._ensure_attributes()
element._attrs[attr.name] = attr
element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
# This creates a circular reference, but Element.unlink()
# breaks the cycle since the references to the attribute
# dictionaries are tossed.
- attr.__dict__['ownerElement'] = element
-
+ attr.ownerElement = element
class Childless:
"""Mixin that makes childless-ness easy to implement and avoids
the complexity of the Node methods that deal with children.
"""
+ __slots__ = ()
attributes = None
childNodes = EmptyNodeList()
@@ -938,54 +966,49 @@ class Childless:
class ProcessingInstruction(Childless, Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
+ __slots__ = ('target', 'data')
def __init__(self, target, data):
- self.target = self.nodeName = target
- self.data = self.nodeValue = data
+ self.target = target
+ self.data = data
- def _get_data(self):
+ # nodeValue is an alias for data
+ def _get_nodeValue(self):
return self.data
- def _set_data(self, value):
- d = self.__dict__
- d['data'] = d['nodeValue'] = value
+ def _set_nodeValue(self, value):
+ self.data = data
+ nodeValue = property(_get_nodeValue, _set_nodeValue)
- def _get_target(self):
+ # nodeName is an alias for target
+ def _get_nodeName(self):
return self.target
- def _set_target(self, value):
- d = self.__dict__
- d['target'] = d['nodeName'] = value
-
- def __setattr__(self, name, value):
- if name == "data" or name == "nodeValue":
- self.__dict__['data'] = self.__dict__['nodeValue'] = value
- elif name == "target" or name == "nodeName":
- self.__dict__['target'] = self.__dict__['nodeName'] = value
- else:
- self.__dict__[name] = value
+ def _set_nodeName(self, value):
+ self.target = value
+ nodeName = property(_get_nodeName, _set_nodeName)
def writexml(self, writer, indent="", addindent="", newl=""):
writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
class CharacterData(Childless, Node):
+ __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
+
+ def __init__(self):
+ self.ownerDocument = self.parentNode = None
+ self.previousSibling = self.nextSibling = None
+ self._data = ''
+ Node.__init__(self)
+
def _get_length(self):
return len(self.data)
__len__ = _get_length
def _get_data(self):
- return self.__dict__['data']
+ return self._data
def _set_data(self, data):
- d = self.__dict__
- d['data'] = d['nodeValue'] = data
-
- _get_nodeValue = _get_data
- _set_nodeValue = _set_data
+ self._data = data
- def __setattr__(self, name, value):
- if name == "data" or name == "nodeValue":
- self.__dict__['data'] = self.__dict__['nodeValue'] = value
- else:
- self.__dict__[name] = value
+ data = nodeValue = property(_get_data, _set_data)
def __repr__(self):
data = self.data
@@ -1042,10 +1065,7 @@ defproperty(CharacterData, "length", doc="Length of the string data.")
class Text(CharacterData):
- # Make sure we don't add an instance __dict__ if we don't already
- # have one, at least when that's possible:
- # XXX this does not work, CharacterData is an old-style class
- # __slots__ = ()
+ __slots__ = ()
nodeType = Node.TEXT_NODE
nodeName = "#text"
@@ -1112,9 +1132,7 @@ class Text(CharacterData):
else:
break
if content:
- d = self.__dict__
- d['data'] = content
- d['nodeValue'] = content
+ self.data = content
return self
else:
return None
@@ -1160,7 +1178,8 @@ class Comment(CharacterData):
nodeName = "#comment"
def __init__(self, data):
- self.data = self.nodeValue = data
+ CharacterData.__init__(self)
+ self._data = data
def writexml(self, writer, indent="", addindent="", newl=""):
if "--" in self.data:
@@ -1169,10 +1188,7 @@ class Comment(CharacterData):
class CDATASection(Text):
- # Make sure we don't add an instance __dict__ if we don't already
- # have one, at least when that's possible:
- # XXX this does not work, Text is an old-style class
- # __slots__ = ()
+ __slots__ = ()
nodeType = Node.CDATA_SECTION_NODE
nodeName = "#cdata-section"
@@ -1252,8 +1268,7 @@ defproperty(ReadOnlySequentialNamedNodeMap, "length",
class Identified:
"""Mix-in class that supports the publicId and systemId attributes."""
- # XXX this does not work, this is an old-style class
- # __slots__ = 'publicId', 'systemId'
+ __slots__ = 'publicId', 'systemId'
def _identified_mixin_init(self, publicId, systemId):
self.publicId = publicId
@@ -1504,18 +1519,19 @@ def _clear_id_cache(node):
node.ownerDocument._id_search_stack= None
class Document(Node, DocumentLS):
+ __slots__ = ('_elem_info', 'doctype',
+ '_id_search_stack', 'childNodes', '_id_cache')
_child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
+ implementation = DOMImplementation()
nodeType = Node.DOCUMENT_NODE
nodeName = "#document"
nodeValue = None
attributes = None
- doctype = None
parentNode = None
previousSibling = nextSibling = None
- implementation = DOMImplementation()
# Document attributes from Level 3 (WD 9 April 2002)
@@ -1530,6 +1546,7 @@ class Document(Node, DocumentLS):
_magic_id_count = 0
def __init__(self):
+ self.doctype = None
self.childNodes = NodeList()
# mapping of (namespaceURI, localName) -> ElementInfo
# and tagName -> ElementInfo
@@ -1815,17 +1832,15 @@ class Document(Node, DocumentLS):
element.removeAttributeNode(n)
else:
element = None
- # avoid __setattr__
- d = n.__dict__
- d['prefix'] = prefix
- d['localName'] = localName
- d['namespaceURI'] = namespaceURI
- d['nodeName'] = name
+ n.prefix = prefix
+ n._localName = localName
+ n.namespaceURI = namespaceURI
+ n.nodeName = name
if n.nodeType == Node.ELEMENT_NODE:
- d['tagName'] = name
+ n.tagName = name
else:
# attribute node
- d['name'] = name
+ n.name = name
if element is not None:
element.setAttributeNode(n)
if is_id:
diff --git a/Lib/xml/dom/pulldom.py b/Lib/xml/dom/pulldom.py
index d5ac8b2b98..43504f7656 100644
--- a/Lib/xml/dom/pulldom.py
+++ b/Lib/xml/dom/pulldom.py
@@ -1,6 +1,5 @@
import xml.sax
import xml.sax.handler
-import types
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
@@ -334,10 +333,7 @@ def parse(stream_or_string, parser=None, bufsize=None):
return DOMEventStream(stream, parser, bufsize)
def parseString(string, parser=None):
- try:
- from io import StringIO
- except ImportError:
- from io import StringIO
+ from io import StringIO
bufsize = len(string)
buf = StringIO(string)
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
index f94c48c09c..5f974f65b0 100644
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -68,8 +68,9 @@ __all__ = [
"tostring", "tostringlist",
"TreeBuilder",
"VERSION",
- "XML",
+ "XML", "XMLID",
"XMLParser", "XMLTreeBuilder",
+ "register_namespace",
]
VERSION = "1.3.0"
@@ -100,7 +101,6 @@ import sys
import re
import warnings
-
class _SimpleElementPath:
# emulate pre-1.2 find/findtext/findall behaviour
def find(self, element, tag, namespaces=None):
@@ -148,9 +148,9 @@ class ParseError(SyntaxError):
# @defreturn flag
def iselement(element):
- # FIXME: not sure about this; might be a better idea to look
- # for tag/attrib/text attributes
- return isinstance(element, Element) or hasattr(element, "tag")
+ # FIXME: not sure about this;
+ # isinstance(element, Element) or look for tag/attrib/text attributes
+ return hasattr(element, 'tag')
##
# Element class. This class defines the Element interface, and
@@ -298,7 +298,7 @@ class Element:
# @param element The element to add.
def append(self, element):
- # assert iselement(element)
+ self._assert_is_element(element)
self._children.append(element)
##
@@ -308,8 +308,8 @@ class Element:
# @since 1.3
def extend(self, elements):
- # for element in elements:
- # assert iselement(element)
+ for element in elements:
+ self._assert_is_element(element)
self._children.extend(elements)
##
@@ -318,9 +318,13 @@ class Element:
# @param index Where to insert the new subelement.
def insert(self, index, element):
- # assert iselement(element)
+ self._assert_is_element(element)
self._children.insert(index, element)
+ def _assert_is_element(self, e):
+ if not isinstance(e, Element):
+ raise TypeError('expected an Element, not %s' % type(e).__name__)
+
##
# Removes a matching subelement. Unlike the <b>find</b> methods,
# this method compares elements based on identity, not on tag
@@ -1085,6 +1089,8 @@ _namespace_map = {
# dublin core
"http://purl.org/dc/elements/1.1/": "dc",
}
+# For tests and troubleshooting
+register_namespace._namespace_map = _namespace_map
def _raise_serialization_error(text):
raise TypeError(
@@ -1509,24 +1515,30 @@ class XMLParser:
self.target = self._target = target
self._error = expat.error
self._names = {} # name memo cache
- # callbacks
+ # main callbacks
parser.DefaultHandlerExpand = self._default
- parser.StartElementHandler = self._start
- parser.EndElementHandler = self._end
- parser.CharacterDataHandler = self._data
- # optional callbacks
- parser.CommentHandler = self._comment
- parser.ProcessingInstructionHandler = self._pi
+ if hasattr(target, 'start'):
+ parser.StartElementHandler = self._start
+ if hasattr(target, 'end'):
+ parser.EndElementHandler = self._end
+ if hasattr(target, 'data'):
+ parser.CharacterDataHandler = target.data
+ # miscellaneous callbacks
+ if hasattr(target, 'comment'):
+ parser.CommentHandler = target.comment
+ if hasattr(target, 'pi'):
+ parser.ProcessingInstructionHandler = target.pi
# let expat do the buffering, if supported
try:
- self._parser.buffer_text = 1
+ parser.buffer_text = 1
except AttributeError:
pass
# use new-style attribute handling, if supported
try:
- self._parser.ordered_attributes = 1
- self._parser.specified_attributes = 1
- parser.StartElementHandler = self._start_list
+ parser.ordered_attributes = 1
+ parser.specified_attributes = 1
+ if hasattr(target, 'start'):
+ parser.StartElementHandler = self._start_list
except AttributeError:
pass
self._doctype = None
@@ -1570,44 +1582,29 @@ class XMLParser:
attrib[fixname(attrib_in[i])] = attrib_in[i+1]
return self.target.start(tag, attrib)
- def _data(self, text):
- return self.target.data(text)
-
def _end(self, tag):
return self.target.end(self._fixname(tag))
- def _comment(self, data):
- try:
- comment = self.target.comment
- except AttributeError:
- pass
- else:
- return comment(data)
-
- def _pi(self, target, data):
- try:
- pi = self.target.pi
- except AttributeError:
- pass
- else:
- return pi(target, data)
-
def _default(self, text):
prefix = text[:1]
if prefix == "&":
# deal with undefined entities
try:
- self.target.data(self.entity[text[1:-1]])
+ data_handler = self.target.data
+ except AttributeError:
+ return
+ try:
+ data_handler(self.entity[text[1:-1]])
except KeyError:
from xml.parsers import expat
err = expat.error(
"undefined entity %s: line %d, column %d" %
- (text, self._parser.ErrorLineNumber,
- self._parser.ErrorColumnNumber)
+ (text, self.parser.ErrorLineNumber,
+ self.parser.ErrorColumnNumber)
)
err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
- err.lineno = self._parser.ErrorLineNumber
- err.offset = self._parser.ErrorColumnNumber
+ err.lineno = self.parser.ErrorLineNumber
+ err.offset = self.parser.ErrorColumnNumber
raise err
elif prefix == "<" and text[:9] == "<!DOCTYPE":
self._doctype = [] # inside a doctype declaration
@@ -1634,7 +1631,7 @@ class XMLParser:
pubid = pubid[1:-1]
if hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
- elif self.doctype is not self._XMLParser__doctype:
+ elif self.doctype != self._XMLParser__doctype:
# warn about deprecated call
self._XMLParser__doctype(name, pubid, system[1:-1])
self.doctype(name, pubid, system[1:-1])
@@ -1665,7 +1662,7 @@ class XMLParser:
def feed(self, data):
try:
- self._parser.Parse(data, 0)
+ self.parser.Parse(data, 0)
except self._error as v:
self._raiseerror(v)
@@ -1677,12 +1674,100 @@ class XMLParser:
def close(self):
try:
- self._parser.Parse("", 1) # end of data
+ self.parser.Parse("", 1) # end of data
except self._error as v:
self._raiseerror(v)
- tree = self.target.close()
- del self.target, self._parser # get rid of circular references
- return tree
+ try:
+ close_handler = self.target.close
+ except AttributeError:
+ pass
+ else:
+ return close_handler()
+ finally:
+ # get rid of circular references
+ del self.parser, self._parser
+ del self.target, self._target
+
+
+# Import the C accelerators
+try:
+ # Element, SubElement, ParseError, TreeBuilder, XMLParser
+ from _elementtree import *
+except ImportError:
+ pass
+else:
+ # Overwrite 'ElementTree.parse' and 'iterparse' to use the C XMLParser
+
+ class ElementTree(ElementTree):
+ def parse(self, source, parser=None):
+ close_source = False
+ if not hasattr(source, 'read'):
+ source = open(source, 'rb')
+ close_source = True
+ try:
+ if parser is not None:
+ while True:
+ data = source.read(65536)
+ if not data:
+ break
+ parser.feed(data)
+ self._root = parser.close()
+ else:
+ parser = XMLParser()
+ self._root = parser._parse(source)
+ return self._root
+ finally:
+ if close_source:
+ source.close()
+
+ class iterparse:
+ root = None
+ def __init__(self, file, events=None):
+ self._close_file = False
+ if not hasattr(file, 'read'):
+ file = open(file, 'rb')
+ self._close_file = True
+ self._file = file
+ self._events = []
+ self._index = 0
+ self._error = None
+ self.root = self._root = None
+ b = TreeBuilder()
+ self._parser = XMLParser(b)
+ self._parser._setevents(self._events, events)
+
+ def __next__(self):
+ while True:
+ try:
+ item = self._events[self._index]
+ self._index += 1
+ return item
+ except IndexError:
+ pass
+ if self._error:
+ e = self._error
+ self._error = None
+ raise e
+ if self._parser is None:
+ self.root = self._root
+ if self._close_file:
+ self._file.close()
+ raise StopIteration
+ # load event buffer
+ del self._events[:]
+ self._index = 0
+ data = self._file.read(16384)
+ if data:
+ try:
+ self._parser.feed(data)
+ except SyntaxError as exc:
+ self._error = exc
+ else:
+ self._root = self._parser.close()
+ self._parser = None
+
+ def __iter__(self):
+ return self
# compatibility
XMLTreeBuilder = XMLParser
diff --git a/Lib/xml/etree/cElementTree.py b/Lib/xml/etree/cElementTree.py
index a6f127abd5..368e679189 100644
--- a/Lib/xml/etree/cElementTree.py
+++ b/Lib/xml/etree/cElementTree.py
@@ -1,3 +1,3 @@
-# Wrapper module for _elementtree
+# Deprecated alias for xml.etree.ElementTree
-from _elementtree import *
+from xml.etree.ElementTree import *
diff --git a/Lib/xml/parsers/expat.py b/Lib/xml/parsers/expat.py
index a805b828d8..bcbe9fb1f8 100644
--- a/Lib/xml/parsers/expat.py
+++ b/Lib/xml/parsers/expat.py
@@ -1,6 +1,4 @@
"""Interface to the Expat non-validating XML parser."""
-__version__ = '$Revision$'
-
import sys
from pyexpat import *