summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.txt3
-rw-r--r--src/lxml/apihelpers.pxi41
-rw-r--r--src/lxml/docloader.pxi7
-rw-r--r--src/lxml/dtd.pxi5
-rw-r--r--src/lxml/extensions.pxi6
-rw-r--r--src/lxml/iterparse.pxi10
-rw-r--r--src/lxml/lxml.etree.pyx128
-rw-r--r--src/lxml/parser.pxi31
-rw-r--r--src/lxml/readonlytree.pxi4
-rw-r--r--src/lxml/relaxng.pxi5
-rw-r--r--src/lxml/saxparser.pxi2
-rw-r--r--src/lxml/schematron.pxi7
-rw-r--r--src/lxml/serializer.pxi5
-rw-r--r--src/lxml/xinclude.pxi3
-rw-r--r--src/lxml/xmlerror.pxi2
-rw-r--r--src/lxml/xmlschema.pxi14
-rw-r--r--src/lxml/xpath.pxi22
-rw-r--r--src/lxml/xslt.pxi35
-rw-r--r--src/lxml/xsltext.pxi2
19 files changed, 245 insertions, 87 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 239312ae..520db93e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -77,6 +77,9 @@ Features added
Bugs fixed
----------
+* API is hardened against invalid proxy instances to prevent crashes
+ due to incorrectly instantiated Element instances.
+
* Prevent crash when instantiating ``CommentBase`` and friends.
* Export ElementTree compatible XML parser class as
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
index 2e600b44..6ee1cd92 100644
--- a/src/lxml/apihelpers.pxi
+++ b/src/lxml/apihelpers.pxi
@@ -11,6 +11,12 @@ cdef void displayNode(xmlNode* c_node, indent):
displayNode(c_child, indent + 1)
c_child = c_child.next
+cdef inline int _assertValidNode(_Element element) except -1:
+ assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
+
+cdef inline int _assertValidDoc(_Document doc) except -1:
+ assert doc._c_doc is not NULL, u"invalid Document proxy at %s" % id(doc)
+
cdef _Document _documentOrRaise(object input):
u"""Call this to get the document of a _Document, _ElementTree or _Element
object, or to raise an exception if it can't be determined.
@@ -33,8 +39,8 @@ cdef _Document _documentOrRaise(object input):
if doc is None:
raise ValueError, u"Input object has no document: %s" % \
python._fqtypename(input)
- else:
- return doc
+ _assertValidDoc(doc)
+ return doc
cdef _Element _rootNodeOrRaise(object input):
u"""Call this to get the root node of a _Document, _ElementTree or
@@ -55,36 +61,41 @@ cdef _Element _rootNodeOrRaise(object input):
if node is None:
raise ValueError, u"Input object has no element: %s" % \
python._fqtypename(input)
- else:
- return node
+ _assertValidNode(node)
+ return node
cdef _Document _documentOf(object input):
# call this to get the document of a
# _Document, _ElementTree or _Element object
# may return None!
cdef _Element element
+ cdef _Document doc = None
if isinstance(input, _ElementTree):
element = (<_ElementTree>input)._context_node
if element is not None:
- return element._doc
+ doc = element._doc
elif isinstance(input, _Element):
- return (<_Element>input)._doc
+ doc = (<_Element>input)._doc
elif isinstance(input, _Document):
- return <_Document>input
- return None
+ doc = <_Document>input
+ if doc is not None:
+ _assertValidDoc(doc)
+ return doc
cdef _Element _rootNodeOf(object input):
# call this to get the root node of a
# _Document, _ElementTree or _Element object
# may return None!
+ cdef _Element element = None
if isinstance(input, _ElementTree):
- return (<_ElementTree>input)._context_node
+ element = (<_ElementTree>input)._context_node
elif isinstance(input, _Element):
- return <_Element>input
+ element = <_Element>input
elif isinstance(input, _Document):
- return (<_Document>input).getroot()
- else:
- return None
+ element = (<_Document>input).getroot()
+ if element is not None:
+ _assertValidNode(element)
+ return element
cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc,
_BaseParser parser, text, tail, attrib, nsmap,
@@ -183,6 +194,7 @@ cdef _Element _makeSubElement(_Element parent, tag, text, tail,
cdef xmlDoc* c_doc
if parent is None or parent._doc is None:
return None
+ _assertValidNode(parent)
ns_utf, name_utf = _getNsTag(tag)
c_doc = parent._doc._c_doc
@@ -1181,6 +1193,7 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node,
if c_node is not NULL:
for element in elements:
assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
# move element and tail over
c_source_doc = element._c_node.doc
c_next = element._c_node.next
@@ -1205,10 +1218,12 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node,
if left_to_right:
for element in elements:
assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
_appendChild(parent, element)
else:
for element in elements:
assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
_prependChild(parent, element)
return 0
diff --git a/src/lxml/docloader.pxi b/src/lxml/docloader.pxi
index dd95e1e8..42409a53 100644
--- a/src/lxml/docloader.pxi
+++ b/src/lxml/docloader.pxi
@@ -1,6 +1,7 @@
# Custom resolver API
ctypedef enum _InputDocumentDataType:
+ PARSER_DATA_INVALID
PARSER_DATA_EMPTY
PARSER_DATA_STRING
PARSER_DATA_FILENAME
@@ -12,6 +13,10 @@ cdef class _InputDocument:
cdef object _filename
cdef object _file
+ def __cinit__(self):
+ self._type = PARSER_DATA_INVALID
+
+
cdef class Resolver:
u"This is the base class of all resolvers."
def resolve(self, system_url, public_id, context):
@@ -101,7 +106,7 @@ cdef class Resolver:
cdef class _ResolverRegistry:
cdef object _resolvers
cdef Resolver _default_resolver
- def __init__(self, Resolver default_resolver=None):
+ def __cinit__(self, Resolver default_resolver=None):
self._resolvers = set()
self._default_resolver = default_resolver
diff --git a/src/lxml/dtd.pxi b/src/lxml/dtd.pxi
index 9c617d65..bd081bad 100644
--- a/src/lxml/dtd.pxi
+++ b/src/lxml/dtd.pxi
@@ -28,8 +28,10 @@ cdef class DTD(_Validator):
catalog.
"""
cdef tree.xmlDtd* _c_dtd
- def __init__(self, file=None, *, external_id=None):
+ def __cinit__(self):
self._c_dtd = NULL
+
+ def __init__(self, file=None, *, external_id=None):
_Validator.__init__(self)
if file is not None:
if _isString(file):
@@ -69,6 +71,7 @@ cdef class DTD(_Validator):
cdef dtdvalid.xmlValidCtxt* valid_ctxt
cdef int ret
+ assert self._c_dtd is not NULL, "DTD not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
diff --git a/src/lxml/extensions.pxi b/src/lxml/extensions.pxi
index 120b661b..6712d151 100644
--- a/src/lxml/extensions.pxi
+++ b/src/lxml/extensions.pxi
@@ -42,6 +42,8 @@ cdef class _BaseContext:
cdef _TempStore _temp_refs
cdef set _temp_documents
cdef _ExceptionContext _exc
+ def __cinit__(self):
+ self._xpathCtxt = NULL
def __init__(self, namespaces, extensions, enable_regexp,
build_smart_strings):
@@ -340,7 +342,7 @@ cdef class _BaseContext:
"""
cdef _Document doc
for doc in self._temp_documents:
- if doc._c_doc is c_node.doc:
+ if doc is not None and doc._c_doc is c_node.doc:
return doc
return None
@@ -374,7 +376,7 @@ def Extension(module, function_mapping=None, *, ns=None):
cdef class _ExsltRegExp:
cdef dict _compile_map
- def __init__(self):
+ def __cinit__(self):
self._compile_map = {}
cdef _make_string(self, value):
diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi
index 74f45e17..71c7bf9e 100644
--- a/src/lxml/iterparse.pxi
+++ b/src/lxml/iterparse.pxi
@@ -79,7 +79,7 @@ cdef class _IterparseContext(_ParserContext):
cdef char* _tag_href
cdef char* _tag_name
- def __init__(self):
+ def __cinit__(self):
self._ns_stack = []
self._pop_ns = self._ns_stack.pop
self._node_stack = []
@@ -581,7 +581,7 @@ cdef class iterwalk:
cdef _Element node
cdef _Element next_node
cdef int ns_count
- if python.PyList_GET_SIZE(self._events):
+ if self._events:
return self._pop_event(0)
ns_count = 0
# find next node
@@ -597,7 +597,7 @@ cdef class iterwalk:
next_node = None
while next_node is None:
# back off through parents
- self._index = self._index - 1
+ self._index -= 1
node = self._end_node()
if self._index < 0:
break
@@ -609,8 +609,8 @@ cdef class iterwalk:
elif self._event_filter & ITERPARSE_FILTER_END_NS:
ns_count = _countNsDefs(next_node._c_node)
self._node_stack.append( (next_node, ns_count) )
- self._index = self._index + 1
- if python.PyList_GET_SIZE(self._events):
+ self._index += 1
+ if self._events:
return self._pop_event(0)
raise StopIteration
diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx
index 0904f7a2..58a77ee9 100644
--- a/src/lxml/lxml.etree.pyx
+++ b/src/lxml/lxml.etree.pyx
@@ -171,7 +171,7 @@ class LxmlError(Error):
def __init__(self, message, error_log=None):
if python.PY_VERSION_HEX >= 0x02050000:
# Python >= 2.5 uses new style class exceptions
- super(_LxmlError, self).__init__(message)
+ super(_Error, self).__init__(message)
else:
error_super_init(self, message)
if error_log is None:
@@ -179,7 +179,7 @@ class LxmlError(Error):
else:
self.error_log = error_log.copy()
-cdef object _LxmlError = LxmlError
+cdef object _Error = Error
cdef object error_super_init = Error.__init__
@@ -326,7 +326,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
cdef bint hasdoctype(self):
# DOCTYPE gets parsed into internal subset (xmlDTD*)
- return self._c_doc.intSubset is not NULL
+ return self._c_doc is not NULL and self._c_doc.intSubset is not NULL
cdef getdoctype(self):
# get doctype info: root tag, public/system ID (or None if not known)
@@ -355,8 +355,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
cdef getxmlinfo(self):
# return XML version and encoding (or None if not known)
- cdef xmlDoc* c_doc
- c_doc = self._c_doc
+ cdef xmlDoc* c_doc = self._c_doc
if c_doc.version is NULL:
version = None
else:
@@ -377,8 +376,8 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
cdef buildNewPrefix(self):
# get a new unique prefix ("nsX") for this document
- if self._ns_counter < python.PyTuple_GET_SIZE(_PREFIX_CACHE):
- ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter)
+ if self._ns_counter < len(_PREFIX_CACHE):
+ ns = _PREFIX_CACHE[self._ns_counter]
python.Py_INCREF(ns)
else:
ns = python.PyBytes_FromFormat("ns%d", self._ns_counter)
@@ -444,12 +443,12 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
c_ns = self._findOrBuildNodeNs(c_node, href, NULL, 0)
tree.xmlSetNs(c_node, c_ns)
-cdef __initPrefixCache():
+cdef tuple __initPrefixCache():
cdef int i
return tuple([ python.PyBytes_FromFormat("ns%d", i)
for i in range(30) ])
-cdef object _PREFIX_CACHE
+cdef tuple _PREFIX_CACHE
_PREFIX_CACHE = __initPrefixCache()
cdef extern from "etree_defs.h":
@@ -607,6 +606,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef _Element element
cdef bint left_to_right
cdef Py_ssize_t slicelength, step
+ _assertValidNode(self)
if value is None:
raise ValueError, u"cannot assign None"
if python.PySlice_Check(x):
@@ -622,6 +622,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
else:
# otherwise: normal item assignment
element = value
+ _assertValidNode(element)
c_node = _findChild(self._c_node, x)
if c_node is NULL:
raise IndexError, u"list index out of range"
@@ -642,6 +643,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef Py_ssize_t step, slicelength
+ _assertValidNode(self)
if python.PySlice_Check(x):
# slice deletion
if _isFullSlice(<python.slice>x):
@@ -673,6 +675,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlDoc* c_doc
cdef xmlNode* c_node
cdef _Document new_doc
+ _assertValidNode(self)
c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive
new_doc = _documentFactory(c_doc, self._doc._parser)
root = new_doc.getroot()
@@ -691,6 +694,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Sets an element attribute.
"""
+ _assertValidNode(self)
_setAttributeValue(self, key, value)
def append(self, _Element element not None):
@@ -698,9 +702,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Adds a subelement to the end of this element.
"""
+ _assertValidNode(self)
+ _assertValidNode(element)
_appendChild(self, element)
- def addnext(self, _Element element):
+ def addnext(self, _Element element not None):
u"""addnext(self, element)
Adds the element as a following sibling directly after this
@@ -710,6 +716,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
the root node of a document. Note that tail text is automatically
discarded when adding at the root level.
"""
+ _assertValidNode(self)
+ _assertValidNode(element)
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
@@ -717,7 +725,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
element.tail = None
_appendSibling(self, element)
- def addprevious(self, _Element element):
+ def addprevious(self, _Element element not None):
u"""addprevious(self, element)
Adds the element as a preceding sibling directly before this
@@ -727,6 +735,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
before the root node of a document. Note that tail text is
automatically discarded when adding at the root level.
"""
+ _assertValidNode(self)
+ _assertValidNode(element)
if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
if element._c_node.type != tree.XML_PI_NODE:
if element._c_node.type != tree.XML_COMMENT_NODE:
@@ -739,7 +749,10 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Extends the current children by the elements in the iterable.
"""
+ _assertValidNode(self)
for element in elements:
+ assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
_appendChild(self, element)
def clear(self):
@@ -752,6 +765,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlAttr* c_attr_next
cdef xmlNode* c_node
cdef xmlNode* c_node_next
+ _assertValidNode(self)
c_node = self._c_node
# remove self.text and self.tail
_removeText(c_node.children)
@@ -780,6 +794,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlNode* c_node
cdef xmlNode* c_next
cdef xmlDoc* c_source_doc
+ _assertValidNode(self)
+ _assertValidNode(element)
c_node = _findChild(self._c_node, index)
if c_node is NULL:
_appendChild(self, element)
@@ -799,6 +815,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
cdef xmlNode* c_node
cdef xmlNode* c_next
+ _assertValidNode(self)
+ _assertValidNode(element)
c_node = element._c_node
if c_node.parent is not self._c_node:
raise ValueError, u"Element is not a child of this node."
@@ -819,6 +837,9 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlNode* c_new_node
cdef xmlNode* c_new_next
cdef xmlDoc* c_source_doc
+ _assertValidNode(self)
+ _assertValidNode(old_element)
+ _assertValidNode(new_element)
c_old_node = old_element._c_node
if c_old_node.parent is not self._c_node:
raise ValueError, u"Element is not a child of this node."
@@ -840,11 +861,13 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
def __get__(self):
if self._tag is not None:
return self._tag
+ _assertValidNode(self)
self._tag = _namespacedName(self._c_node)
return self._tag
def __set__(self, value):
cdef _BaseParser parser
+ _assertValidNode(self)
ns, name = _getNsTag(value)
parser = self._doc._parser
if parser is not None and parser._for_html:
@@ -863,6 +886,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
keys(), values() and items() to access element attributes.
"""
def __get__(self):
+ _assertValidNode(self)
return _Attrib(self)
## cdef python.PyObject* ref
## if self._attrib is not None:
@@ -878,9 +902,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
the value None, if there was no text.
"""
def __get__(self):
+ _assertValidNode(self)
return _collectText(self._c_node.children)
def __set__(self, value):
+ _assertValidNode(self)
if isinstance(value, QName):
value = python.PyUnicode_FromEncodedObject(
_resolveQNameText(self, value), 'UTF-8', 'strict')
@@ -896,9 +922,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
there was no text.
"""
def __get__(self):
+ _assertValidNode(self)
return _collectText(self._c_node.next)
def __set__(self, value):
+ _assertValidNode(self)
_setTailText(self._c_node, value)
# using 'del el.tail' is the wrong thing to do
@@ -921,6 +949,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
def __get__(self):
cdef long line
+ _assertValidNode(self)
line = tree.xmlGetLineNo(self._c_node)
if line > 0:
return line
@@ -928,6 +957,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
return None
def __set__(self, line):
+ _assertValidNode(self)
if line < 0:
self._c_node.line = 0
else:
@@ -945,6 +975,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef xmlNode* c_node
cdef xmlNs* c_ns
cdef dict nsmap = {}
+ _assertValidNode(self)
c_node = self._c_node
while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
c_ns = c_node.nsDef
@@ -973,6 +1004,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""
def __get__(self):
cdef char* c_base
+ _assertValidNode(self)
c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node)
if c_base is NULL:
if self._doc._c_doc.URL is NULL:
@@ -981,8 +1013,10 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
base = _decodeFilename(c_base)
tree.xmlFree(c_base)
return base
+
def __set__(self, url):
cdef char* c_base
+ _assertValidNode(self)
if url is None:
c_base = NULL
else:
@@ -1004,6 +1038,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef Py_ssize_t c, i
cdef _node_to_node_function next_element
cdef list result
+ _assertValidNode(self)
if python.PySlice_Check(x):
# slicing
if _isFullSlice(<python.slice>x):
@@ -1036,6 +1071,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Returns the number of subelements.
"""
+ _assertValidNode(self)
return _countElements(self._c_node.children)
def __nonzero__(self):
@@ -1047,11 +1083,13 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
FutureWarning
)
# emulate old behaviour
+ _assertValidNode(self)
return _hasChild(self._c_node)
def __contains__(self, element):
u"__contains__(self, element)"
cdef xmlNode* c_node
+ _assertValidNode(self)
if not isinstance(element, _Element):
return 0
c_node = (<_Element>element)._c_node
@@ -1076,6 +1114,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef Py_ssize_t c_start, c_stop
cdef xmlNode* c_child
cdef xmlNode* c_start_node
+ _assertValidNode(self)
+ _assertValidNode(child)
c_child = child._c_node
if c_child.parent is not self._c_node:
raise ValueError, u"Element is not a child of this node."
@@ -1155,6 +1195,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Gets an element attribute.
"""
+ _assertValidNode(self)
return _getAttributeValue(self, key, default)
def keys(self):
@@ -1163,6 +1204,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Gets a list of attribute names. The names are returned in an
arbitrary order (just like for an ordinary Python dictionary).
"""
+ _assertValidNode(self)
return _collectAttributes(self._c_node, 1)
def values(self):
@@ -1171,6 +1213,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Gets element attribute values as a sequence of strings. The
attributes are returned in an arbitrary order.
"""
+ _assertValidNode(self)
return _collectAttributes(self._c_node, 2)
def items(self):
@@ -1179,6 +1222,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Gets element attributes, as a sequence. The attributes are returned in
an arbitrary order.
"""
+ _assertValidNode(self)
return _collectAttributes(self._c_node, 3)
def getchildren(self):
@@ -1191,6 +1235,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
ElementTree 1.3 and lxml 2.0. New code should use
``list(element)`` or simply iterate over elements.
"""
+ _assertValidNode(self)
return _collectChildren(self)
def getparent(self):
@@ -1199,6 +1244,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Returns the parent of this element or None for the root element.
"""
cdef xmlNode* c_node
+ #_assertValidNode(self) # not needed
c_node = _parentElement(self._c_node)
if c_node is NULL:
return None
@@ -1210,6 +1256,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Returns the following sibling of this element or None.
"""
cdef xmlNode* c_node
+ #_assertValidNode(self) # not needed
c_node = _nextElement(self._c_node)
if c_node is NULL:
return None
@@ -1221,6 +1268,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Returns the preceding sibling of this element or None.
"""
cdef xmlNode* c_node
+ #_assertValidNode(self) # not needed
c_node = _previousElement(self._c_node)
if c_node is NULL:
return None
@@ -1282,6 +1330,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
This is the same as following element.getparent() up the tree until it
returns None (for the root element) and then build an ElementTree for
the last parent that was returned."""
+ _assertValidDoc(self._doc)
return _elementTreeFactory(self._doc, None)
def getiterator(self, tag=None):
@@ -1339,6 +1388,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
Creates a new element associated with the same document.
"""
+ _assertValidDoc(self._doc)
return _makeElement(_tag, NULL, self._doc, None, None, None,
attrib, nsmap, _extra)
@@ -1408,7 +1458,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
cdef extern from "etree_defs.h":
# macro call to 't->tp_new()' for fast instantiation
- cdef _Element NEW_ELEMENT "PY_NEW" (object t)
+ cdef object NEW_ELEMENT "PY_NEW" (object t)
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
cdef _Element result
@@ -1461,6 +1511,7 @@ cdef class __ContentOnlyElement(_Element):
property text:
def __get__(self):
+ _assertValidNode(self)
if self._c_node.content is NULL:
return ''
else:
@@ -1469,6 +1520,7 @@ cdef class __ContentOnlyElement(_Element):
def __set__(self, value):
cdef tree.xmlDict* c_dict
cdef char* c_text
+ _assertValidNode(self)
if value is None:
c_text = NULL
else:
@@ -1520,9 +1572,11 @@ cdef class _ProcessingInstruction(__ContentOnlyElement):
property target:
# not in ElementTree
def __get__(self):
+ _assertValidNode(self)
return funicode(self._c_node.name)
def __set__(self, value):
+ _assertValidNode(self)
value = _utf8(value)
c_text = _cstr(value)
tree.xmlNodeSetName(self._c_node, c_text)
@@ -1542,9 +1596,11 @@ cdef class _Entity(__ContentOnlyElement):
property name:
# not in ElementTree
def __get__(self):
+ _assertValidNode(self)
return funicode(self._c_node.name)
def __set__(self, value):
+ _assertValidNode(self)
value_utf = _utf8(value)
assert u'&' not in value and u';' not in value, \
u"Invalid entity name '%s'" % value
@@ -1554,6 +1610,7 @@ cdef class _Entity(__ContentOnlyElement):
# FIXME: should this be None or '&[VALUE];' or the resolved
# entity value ?
def __get__(self):
+ _assertValidNode(self)
return u'&%s;' % funicode(self._c_node.name)
def __repr__(self):
@@ -1659,6 +1716,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
Relocate the ElementTree to a new root node.
"""
+ _assertValidNode(root)
if root._c_node.type != tree.XML_ELEMENT_NODE:
raise TypeError, u"Only elements can be the root of an ElementTree"
self._context_node = root
@@ -1693,6 +1751,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
python.PyErr_NoMemory()
return _elementTreeFactory(None, root)
elif self._doc is not None:
+ _assertValidDoc(self._doc)
c_doc = tree.xmlCopyDoc(self._doc._c_doc, 1)
if c_doc is NULL:
python.PyErr_NoMemory()
@@ -1754,6 +1813,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
cdef bint write_declaration
cdef int is_standalone
self._assertHasRoot()
+ _assertValidNode(self._context_node)
if compression is None or compression < 0:
compression = 0
# C14N serialisation
@@ -1797,12 +1857,24 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
Returns a structural, absolute XPath expression to find that element.
"""
cdef _Document doc
+ cdef _Element root
cdef xmlDoc* c_doc
cdef char* c_path
- doc = self._context_node._doc
+ _assertValidNode(element)
+ if self._context_node is not None:
+ root = self._context_node
+ doc = root._doc
+ elif self._doc is not None:
+ doc = self._doc
+ root = doc.getroot()
+ else:
+ raise ValueError, u"Element is not in this tree."
+ _assertValidDoc(doc)
+ _assertValidNode(root)
if element._doc is not doc:
raise ValueError, u"Element is not in this tree."
- c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node)
+
+ c_doc = _fakeRootDoc(doc._c_doc, root._c_node)
c_path = tree.xmlGetNodePath(element._c_node)
_destroyFakeDoc(doc._c_doc, c_doc)
if c_path is NULL:
@@ -2020,7 +2092,6 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
Note that XInclude does not support custom resolvers in Python space
due to restrictions of libxml2 <= 2.6.29.
"""
- cdef int result
self._assertHasRoot()
XInclude()(self._context_node)
@@ -2034,6 +2105,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType,
The ``compression`` option enables GZip compression level 1-9.
"""
self._assertHasRoot()
+ _assertValidNode(self._context_node)
if compression is None or compression < 0:
compression = 0
_tofilelikeC14N(file, self._context_node, exclusive, with_comments,
@@ -2049,7 +2121,10 @@ cdef _ElementTree _newElementTree(_Document doc, _Element context_node,
if context_node is None and doc is not None:
context_node = doc.getroot()
if context_node is None:
+ _assertValidDoc(doc)
result._doc = doc
+ else:
+ _assertValidNode(context_node)
result._context_node = context_node
return result
@@ -2059,6 +2134,7 @@ cdef class _Attrib:
"""
cdef _Element _element
def __cinit__(self, _Element element not None):
+ _assertValidNode(element)
self._element = element
# MANIPULATORS
@@ -2285,8 +2361,9 @@ cdef class ElementChildIterator(_ElementIterator):
u"""ElementChildIterator(self, node, tag=None, reversed=False)
Iterates over the children of an element.
"""
- def __init__(self, _Element node not None, tag=None, *, reversed=False):
+ def __cinit__(self, _Element node not None, tag=None, *, reversed=False):
cdef xmlNode* c_node
+ _assertValidNode(node)
self._initTagMatch(tag)
if reversed:
c_node = _findChildBackwards(node._c_node, 0)
@@ -2310,7 +2387,8 @@ cdef class SiblingsIterator(_ElementIterator):
You can pass the boolean keyword ``preceding`` to specify the direction.
"""
- def __init__(self, _Element node not None, tag=None, *, preceding=False):
+ def __cinit__(self, _Element node not None, tag=None, *, preceding=False):
+ _assertValidNode(node)
self._initTagMatch(tag)
if preceding:
self._next_element = _previousElement
@@ -2322,7 +2400,8 @@ cdef class AncestorsIterator(_ElementIterator):
u"""AncestorsIterator(self, node, tag=None)
Iterates over the ancestors of an element (from parent to parent).
"""
- def __init__(self, _Element node not None, tag=None):
+ def __cinit__(self, _Element node not None, tag=None):
+ _assertValidNode(node)
self._initTagMatch(tag)
self._next_element = _parentElement
self._storeNext(node)
@@ -2351,7 +2430,8 @@ cdef class ElementDepthFirstIterator(_ElementTagMatcher):
# keep next node to return and the (s)top node
cdef _Element _next_node
cdef _Element _top_node
- def __init__(self, _Element node not None, tag=None, *, inclusive=True):
+ def __cinit__(self, _Element node not None, tag=None, *, inclusive=True):
+ _assertValidNode(node)
self._top_node = node
self._next_node = node
self._initTagMatch(tag)
@@ -2417,7 +2497,8 @@ cdef class ElementTextIterator:
"""
cdef object _nextEvent
cdef _Element _start_element
- def __init__(self, _Element element not None, tag=None, *, with_tail=True):
+ def __cinit__(self, _Element element not None, tag=None, *, with_tail=True):
+ _assertValidNode(element)
if with_tail:
events = (u"start", u"end")
else:
@@ -2683,7 +2764,7 @@ def iselement(element):
Checks if an object appears to be a valid element object.
"""
- return isinstance(element, _Element)
+ return isinstance(element, _Element) and (<_Element>element)._c_node is not NULL
def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
u"""dump(elem, pretty_print=True, with_tail=True)
@@ -2691,6 +2772,7 @@ def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
Writes an element tree or element structure to sys.stdout. This function
should be used for debugging only.
"""
+ _assertValidNode(elem)
_dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method=u"xml",
@@ -2900,8 +2982,7 @@ class DocumentInvalid(LxmlError):
cdef class _Validator:
u"Base class for XML validators."
cdef _ErrorLog _error_log
- def __init__(self):
- u"__init__(self)"
+ def __cinit__(self):
self._error_log = _ErrorLog()
def validate(self, etree):
@@ -2943,6 +3024,7 @@ cdef class _Validator:
property error_log:
u"The log of validation errors and warnings."
def __get__(self):
+ assert self._error_log is not None, "XPath evaluator not initialised"
return self._error_log.copy()
include "dtd.pxi" # DTD
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index e85d927b..5f027baa 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -20,18 +20,18 @@ class ParseError(LxmlSyntaxError):
# Python >= 2.5 uses new style class exceptions
super(_ParseError, self).__init__(message)
else:
- _XMLSyntaxError.__init__(self, message)
+ _LxmlSyntaxError.__init__(self, message)
self.position = (line, column)
self.code = code
+cdef object _LxmlSyntaxError = LxmlSyntaxError
+cdef object _ParseError = ParseError
+
class XMLSyntaxError(ParseError):
u"""Syntax error while parsing an XML document.
"""
pass
-cdef object _XMLSyntaxError = XMLSyntaxError
-cdef object _ParseError = ParseError
-
class ParserError(LxmlError):
u"""Internal lxml parser error.
"""
@@ -51,7 +51,8 @@ cdef class _ParserDictionaryContext:
cdef _BaseParser _default_parser
cdef list _implied_parser_contexts
- def __init__(self):
+ def __cinit__(self):
+ self._c_dict = NULL
self._implied_parser_contexts = []
def __dealloc__(self):
@@ -65,7 +66,7 @@ cdef class _ParserDictionaryContext:
cdef python.PyObject* result
thread_dict = python.PyThreadState_GetDict()
if thread_dict is not NULL:
- (<object>thread_dict)[u"_ParserDictionaryContext"] = self
+ (<dict>thread_dict)[u"_ParserDictionaryContext"] = self
cdef _ParserDictionaryContext _findThreadParserContext(self):
u"Find (or create) the _ParserDictionaryContext object for the current thread"
@@ -75,7 +76,7 @@ cdef class _ParserDictionaryContext:
thread_dict = python.PyThreadState_GetDict()
if thread_dict is NULL:
return self
- d = <object>thread_dict
+ d = <dict>thread_dict
result = python.PyDict_GetItem(d, u"_ParserDictionaryContext")
if result is not NULL:
return <object>result
@@ -264,7 +265,7 @@ cdef class _FileReaderContext:
cdef _ExceptionContext _exc_context
cdef Py_ssize_t _bytes_read
cdef char* _c_url
- def __init__(self, filelike, exc_context, url, encoding):
+ def __cinit__(self, filelike, exc_context, url, encoding):
self._exc_context = exc_context
self._filelike = filelike
self._encoding = encoding
@@ -472,6 +473,13 @@ cdef class _ParserContext(_ResolverContext):
cdef _ParserSchemaValidationContext _validator
cdef xmlparser.xmlParserCtxt* _c_ctxt
cdef python.PyThread_type_lock _lock
+ def __cinit__(self):
+ self._c_ctxt = NULL
+ if not config.ENABLE_THREADING:
+ self._lock = NULL
+ else:
+ self._lock = python.PyThread_allocate_lock()
+ self._error_log = _ErrorLog()
def __dealloc__(self):
if self._validator is not None:
@@ -543,13 +551,8 @@ cdef _initParserContext(_ParserContext context,
_ResolverRegistry resolvers,
xmlparser.xmlParserCtxt* c_ctxt):
_initResolverContext(context, resolvers)
- if not config.ENABLE_THREADING:
- context._lock = NULL
- else:
- context._lock = python.PyThread_allocate_lock()
if c_ctxt is not NULL:
context._initParserContext(c_ctxt)
- context._error_log = _ErrorLog()
cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename,
_ErrorLog error_log) except 0:
@@ -839,6 +842,8 @@ cdef class _BaseParser:
parser._resolvers = self._resolvers
parser.target = self.target
parser._class_lookup = self._class_lookup
+ parser._default_encoding = self._default_encoding
+ parser._schema = self._schema
return parser
def copy(self):
diff --git a/src/lxml/readonlytree.pxi b/src/lxml/readonlytree.pxi
index 838b313c..9caad4a0 100644
--- a/src/lxml/readonlytree.pxi
+++ b/src/lxml/readonlytree.pxi
@@ -6,6 +6,9 @@ cdef class _ReadOnlyProxy:
cdef xmlNode* _c_node
cdef _ReadOnlyProxy _source_proxy
cdef list _dependent_proxies
+ def __cinit__(self):
+ self._c_node = NULL
+ self._free_after_use = 0
cdef int _assertNode(self) except -1:
u"""This is our way of saying: this proxy is invalid!
@@ -329,7 +332,6 @@ cdef _ReadOnlyProxy _newReadOnlyProxy(
cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
_ReadOnlyProxy source_proxy):
- el._free_after_use = 0
if source_proxy is None:
el._source_proxy = el
el._dependent_proxies = [el]
diff --git a/src/lxml/relaxng.pxi b/src/lxml/relaxng.pxi
index 9813ca81..fe673e6d 100644
--- a/src/lxml/relaxng.pxi
+++ b/src/lxml/relaxng.pxi
@@ -27,6 +27,9 @@ cdef class RelaxNG(_Validator):
filename through the ``file`` keyword argument.
"""
cdef relaxng.xmlRelaxNG* _c_schema
+ def __cinit__(self):
+ self._c_schema = NULL
+
def __init__(self, etree=None, *, file=None):
cdef _Document doc
cdef _Element root_node
@@ -35,7 +38,6 @@ cdef class RelaxNG(_Validator):
cdef char* c_href
cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt
_Validator.__init__(self)
- self._c_schema = NULL
fake_c_doc = NULL
if etree is not None:
doc = _documentOrRaise(etree)
@@ -103,6 +105,7 @@ cdef class RelaxNG(_Validator):
cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt
cdef int ret
+ assert self._c_schema is not NULL, "RelaxNG instance not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
diff --git a/src/lxml/saxparser.pxi b/src/lxml/saxparser.pxi
index f75ea7c7..5c1bb69b 100644
--- a/src/lxml/saxparser.pxi
+++ b/src/lxml/saxparser.pxi
@@ -340,7 +340,7 @@ cdef class TreeBuilder(_SaxParserTarget):
cdef list _data
cdef list _element_stack
cdef object _element_stack_pop
- cdef _Element _last
+ cdef _Element _last # may be None
cdef bint _in_tail
def __init__(self, *, element_factory=None, parser=None):
diff --git a/src/lxml/schematron.pxi b/src/lxml/schematron.pxi
index 0291c8fa..664e757b 100644
--- a/src/lxml/schematron.pxi
+++ b/src/lxml/schematron.pxi
@@ -70,14 +70,16 @@ cdef class Schematron(_Validator):
"""
cdef schematron.xmlSchematron* _c_schema
cdef xmlDoc* _c_schema_doc
+ def __cinit__(self):
+ self._c_schema = NULL
+ self._c_schema_doc = NULL
+
def __init__(self, etree=None, *, file=None):
cdef _Document doc
cdef _Element root_node
cdef xmlNode* c_node
cdef char* c_href
cdef schematron.xmlSchematronParserCtxt* parser_ctxt
- self._c_schema = NULL
- self._c_schema_doc = NULL
_Validator.__init__(self)
if not config.ENABLE_SCHEMATRON:
raise SchematronError, \
@@ -138,6 +140,7 @@ cdef class Schematron(_Validator):
cdef int ret
cdef int options
+ assert self._c_schema is not NULL, "Schematron instance not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi
index a4265143..655a566e 100644
--- a/src/lxml/serializer.pxi
+++ b/src/lxml/serializer.pxi
@@ -89,6 +89,7 @@ cdef _tostring(_Element element, encoding, doctype, method,
cdef int error_result
if element is None:
return None
+ _assertValidNode(element)
c_method = _findOutputMethod(method)
if c_method == OUTPUT_METHOD_TEXT:
return _textToString(element._c_node, encoding, with_tail)
@@ -151,10 +152,12 @@ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments):
cdef _Element element
if isinstance(element_or_tree, _Element):
+ _assertValidNode(<_Element>element_or_tree)
doc = (<_Element>element_or_tree)._doc
c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
else:
doc = _documentOrRaise(element_or_tree)
+ _assertValidDoc(doc)
c_doc = doc._c_doc
with nogil:
@@ -345,7 +348,7 @@ cdef class _FilelikeWriter:
cdef object _close_filelike
cdef _ExceptionContext _exc_context
cdef _ErrorLog error_log
- def __init__(self, filelike, exc_context=None, compression=None):
+ def __cinit__(self, filelike, exc_context=None, compression=None):
if compression is not None and compression > 0:
filelike = gzip.GzipFile(
fileobj=filelike, mode=u'wb', compresslevel=compression)
diff --git a/src/lxml/xinclude.pxi b/src/lxml/xinclude.pxi
index 208d55e6..8080b505 100644
--- a/src/lxml/xinclude.pxi
+++ b/src/lxml/xinclude.pxi
@@ -20,6 +20,7 @@ cdef class XInclude:
property error_log:
def __get__(self):
+ assert self._error_log is not None, "XInclude instance not initialised"
return self._error_log.copy()
def __call__(self, _Element node not None):
@@ -32,6 +33,8 @@ cdef class XInclude:
# typed as elements. The included fragment is added between the two,
# i.e. as a sibling, which does not conflict with traversal.
cdef int result
+ _assertValidNode(node)
+ assert self._error_log is not None, "XPath evaluator not initialised"
self._error_log.connect()
__GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(
node._doc._parser)
diff --git a/src/lxml/xmlerror.pxi b/src/lxml/xmlerror.pxi
index a53f87d5..7514b248 100644
--- a/src/lxml/xmlerror.pxi
+++ b/src/lxml/xmlerror.pxi
@@ -494,7 +494,7 @@ cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error) with gil:
cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error) nogil:
# no Python objects here, may be called without thread context !
# when we declare a Python object, Pyrex will INCREF(None) !
- if __DEBUG != 0:
+ if __DEBUG:
_forwardError(c_log_handler, error)
cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...) nogil:
diff --git a/src/lxml/xmlschema.pxi b/src/lxml/xmlschema.pxi
index 981cec6b..1cf35b4f 100644
--- a/src/lxml/xmlschema.pxi
+++ b/src/lxml/xmlschema.pxi
@@ -36,6 +36,10 @@ cdef class XMLSchema(_Validator):
cdef xmlschema.xmlSchema* _c_schema
cdef bint _has_default_attributes
cdef bint _add_attribute_defaults
+ def __cinit__(self):
+ self._c_schema = NULL
+ self._has_default_attributes = True # play safe
+ self._add_attribute_defaults = False
def __init__(self, etree=None, *, file=None, attribute_defaults=False):
cdef _Document doc
@@ -45,9 +49,7 @@ cdef class XMLSchema(_Validator):
cdef char* c_href
cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt
- self._has_default_attributes = True # play safe
self._add_attribute_defaults = attribute_defaults
- self._c_schema = NULL
_Validator.__init__(self)
fake_c_doc = NULL
if etree is not None:
@@ -126,6 +128,7 @@ cdef class XMLSchema(_Validator):
cdef xmlDoc* c_doc
cdef int ret
+ assert self._c_schema is not NULL, "Schema instance not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
@@ -161,8 +164,6 @@ cdef class XMLSchema(_Validator):
cdef _ParserSchemaValidationContext context
context = NEW_SCHEMA_CONTEXT(_ParserSchemaValidationContext)
context._schema = self
- context._valid_ctxt = NULL
- context._sax_plug = NULL
context._add_default_attributes = (self._has_default_attributes and (
add_default_attributes or self._add_attribute_defaults))
return context
@@ -172,6 +173,10 @@ cdef class _ParserSchemaValidationContext:
cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt
cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug
cdef bint _add_default_attributes
+ def __cinit__(self):
+ self._valid_ctxt = NULL
+ self._sax_plug = NULL
+ self._add_default_attributes = False
def __dealloc__(self):
self.disconnect()
@@ -179,6 +184,7 @@ cdef class _ParserSchemaValidationContext:
xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt)
cdef _ParserSchemaValidationContext copy(self):
+ assert self._schema is not None, "_ParserSchemaValidationContext not initialised"
return self._schema._newSaxValidator(
self._add_default_attributes)
diff --git a/src/lxml/xpath.pxi b/src/lxml/xpath.pxi
index 60ca3f6b..dc9dffdb 100644
--- a/src/lxml/xpath.pxi
+++ b/src/lxml/xpath.pxi
@@ -130,6 +130,13 @@ cdef class _XPathEvaluatorBase:
cdef _XPathContext _context
cdef python.PyThread_type_lock _eval_lock
cdef _ErrorLog _error_log
+ def __cinit__(self):
+ self._xpathCtxt = NULL
+ if config.ENABLE_THREADING:
+ self._eval_lock = python.PyThread_allocate_lock()
+ if self._eval_lock is NULL:
+ python.PyErr_NoMemory()
+ self._error_log = _ErrorLog()
def __init__(self, namespaces, extensions, enable_regexp,
smart_strings):
@@ -139,17 +146,13 @@ cdef class _XPathEvaluatorBase:
import warnings
warnings.warn(u"This version of libxml2 has a known XPath bug. " + \
u"Use it at your own risk.")
- self._error_log = _ErrorLog()
self._context = _XPathContext(namespaces, extensions,
enable_regexp, None,
smart_strings)
- if config.ENABLE_THREADING:
- self._eval_lock = python.PyThread_allocate_lock()
- if self._eval_lock is NULL:
- python.PyErr_NoMemory()
property error_log:
def __get__(self):
+ assert self._error_log is not None, "XPath evaluator not initialised"
return self._error_log.copy()
def __dealloc__(self):
@@ -195,7 +198,7 @@ cdef class _XPathEvaluatorBase:
result = python.PyThread_acquire_lock(
self._eval_lock, python.WAIT_LOCK)
if result == 0:
- raise ParserError, u"parser locking failed"
+ raise XPathError, u"XPath evaluator locking failed"
return 0
cdef void _unlock(self):
@@ -266,6 +269,8 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase):
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
cdef _Document doc
+ _assertValidNode(element)
+ _assertValidDoc(element._doc)
self._element = element
doc = element._doc
_XPathEvaluatorBase.__init__(self, namespaces, extensions,
@@ -300,6 +305,7 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase):
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
path = _utf8(_path)
doc = self._element._doc
@@ -351,6 +357,7 @@ cdef class XPathDocumentEvaluator(XPathElementEvaluator):
cdef xmlDoc* c_doc
cdef _Document doc
cdef char* c_path
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
path = _utf8(_path)
doc = self._element._doc
@@ -417,6 +424,8 @@ cdef class XPath(_XPathEvaluatorBase):
"""
cdef xpath.xmlXPathCompExpr* _xpath
cdef bytes _path
+ def __cinit__(self):
+ self._xpath = NULL
def __init__(self, path, *, namespaces=None, extensions=None,
regexp=True, smart_strings=True):
@@ -440,6 +449,7 @@ cdef class XPath(_XPathEvaluatorBase):
cdef _Document document
cdef _Element element
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
diff --git a/src/lxml/xslt.pxi b/src/lxml/xslt.pxi
index dab0b568..742fc6a9 100644
--- a/src/lxml/xslt.pxi
+++ b/src/lxml/xslt.pxi
@@ -124,8 +124,7 @@ cdef void _xslt_store_resolver_exception(char* c_uri, void* context,
cdef xmlDoc* _xslt_doc_loader(char* c_uri, tree.xmlDict* c_dict,
int parse_options, void* c_ctxt,
xslt.xsltLoadType c_type) nogil:
- # no Python objects here, may be called without thread context !
- # when we declare a Python object, Pyrex will INCREF(None) !
+ # nogil => no Python objects here, may be called without thread context !
cdef xmlDoc* c_doc
cdef xmlDoc* result
cdef void* c_pcontext
@@ -186,7 +185,7 @@ cdef class XSLTAccessControl:
See `XSLT`.
"""
cdef xslt.xsltSecurityPrefs* _prefs
- def __init__(self, *, read_file=True, write_file=True, create_dir=True,
+ def __cinit__(self, *, read_file=True, write_file=True, create_dir=True,
read_network=True, write_network=True):
self._prefs = xslt.xsltNewSecurityPrefs()
if self._prefs is NULL:
@@ -271,10 +270,12 @@ cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
cdef _ReadOnlyElementProxy _extension_element_proxy
cdef dict _extension_elements
- def __init__(self, namespaces, extensions, enable_regexp,
- build_smart_strings):
+ def __cinit__(self):
self._xsltCtxt = NULL
self._extension_elements = EMPTY_DICT
+
+ def __init__(self, namespaces, extensions, enable_regexp,
+ build_smart_strings):
if extensions is not None and extensions:
for ns_name_tuple, extension in extensions.items():
if ns_name_tuple[0] is None:
@@ -320,7 +321,7 @@ cdef class _XSLTQuotedStringParam:
quote escaping.
"""
cdef bytes strval
- def __init__(self, strval):
+ def __cinit__(self, strval):
self.strval = _utf8(strval)
@@ -356,6 +357,9 @@ cdef class XSLT:
cdef XSLTAccessControl _access_control
cdef _ErrorLog _error_log
+ def __cinit__(self):
+ self._c_style = NULL
+
def __init__(self, xslt_input, *, extensions=None, regexp=True,
access_control=None):
cdef xslt.xsltStylesheet* c_style
@@ -413,7 +417,8 @@ cdef class XSLT:
self._xslt_resolver_context._c_style_doc is not NULL:
tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc)
# this cleans up the doc copy as well
- xslt.xsltFreeStylesheet(self._c_style)
+ if self._c_style is not NULL:
+ xslt.xsltFreeStylesheet(self._c_style)
property error_log:
u"The log of errors and warnings of an XSLT execution."
@@ -477,6 +482,7 @@ cdef class XSLT:
cdef tree.xmlDict* c_dict
cdef char** params
+ assert self._c_style is not NULL, "XSLT stylesheet not initialised"
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
@@ -645,6 +651,7 @@ cdef extern from "etree_defs.h":
cdef XSLT _copyXSLT(XSLT stylesheet):
cdef XSLT new_xslt
cdef xmlDoc* c_doc
+ assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised"
new_xslt = NEW_XSLT(XSLT) # without calling __init__()
new_xslt._access_control = stylesheet._access_control
new_xslt._error_log = _ErrorLog()
@@ -668,6 +675,11 @@ cdef class _XSLTResultTree(_ElementTree):
cdef char* _buffer
cdef Py_ssize_t _buffer_len
cdef Py_ssize_t _buffer_refcnt
+ def __cinit__(self):
+ self._buffer = NULL
+ self._buffer_len = 0
+ self._buffer_refcnt = 0
+
cdef _saveToStringAndSize(self, char** s, int* l):
cdef _Document doc
cdef int r
@@ -719,7 +731,7 @@ cdef class _XSLTResultTree(_ElementTree):
def __getbuffer__(self, Py_buffer* buffer, int flags):
cdef int l
if buffer is NULL:
- return # LOCK
+ return
if self._buffer is NULL or flags & python.PyBUF_WRITABLE:
self._saveToStringAndSize(<char**>&buffer.buf, &l)
buffer.len = l
@@ -748,7 +760,7 @@ cdef class _XSLTResultTree(_ElementTree):
def __releasebuffer__(self, Py_buffer* buffer):
if buffer is NULL:
- return # UNLOCK
+ return
if <char*>buffer.buf is self._buffer:
self._buffer_refcnt -= 1
if self._buffer_refcnt == 0:
@@ -778,9 +790,6 @@ cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile):
result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree)
result._xslt = xslt
result._profile = profile
- result._buffer = NULL
- result._buffer_refcnt = 0
- result._buffer_len = 0
return result
# functions like "output" and "write" are a potential security risk, but we
@@ -831,6 +840,7 @@ cdef class _XSLTProcessingInstruction(PIBase):
cdef _Element result_node
cdef char* c_href
cdef xmlAttr* c_attr
+ _assertValidNode(self)
if self._c_node.content is NULL:
raise ValueError, u"PI lacks content"
hrefs = _FIND_PI_HREF(u' ' + funicode(self._c_node.content))
@@ -852,6 +862,7 @@ cdef class _XSLTProcessingInstruction(PIBase):
# ID reference to embedded stylesheet
# try XML:ID lookup
+ _assertValidDoc(self._doc)
c_href += 1 # skip leading '#'
c_attr = tree.xmlGetID(self._c_node.doc, c_href)
if c_attr is not NULL and c_attr.doc is self._c_node.doc:
diff --git a/src/lxml/xsltext.pxi b/src/lxml/xsltext.pxi
index 436a6b54..207865c9 100644
--- a/src/lxml/xsltext.pxi
+++ b/src/lxml/xsltext.pxi
@@ -37,6 +37,7 @@ cdef class XSLTExtension:
cdef xmlNode* c_parent
cdef xmlNode* c_node
cdef xmlNode* c_context_node
+ assert context._xsltCtxt is not NULL, "XSLT context not initialised"
c_context_node = _roNodeOf(node)
#assert c_context_node.doc is context._xsltContext.node.doc, \
# "switching input documents during transformation is not currently supported"
@@ -80,6 +81,7 @@ cdef class XSLTExtension:
cdef xmlNode* c_parent
cdef xslt.xsltTransformContext* c_ctxt = context._xsltCtxt
cdef xmlNode* c_old_output_parent = c_ctxt.insert
+ assert context._xsltCtxt is not NULL, "XSLT context not initialised"
# output_parent node is used for adding results instead of
# elements list used in apply_templates, that's easier and allows to