diff options
-rw-r--r-- | CHANGES.txt | 3 | ||||
-rw-r--r-- | src/lxml/apihelpers.pxi | 41 | ||||
-rw-r--r-- | src/lxml/docloader.pxi | 7 | ||||
-rw-r--r-- | src/lxml/dtd.pxi | 5 | ||||
-rw-r--r-- | src/lxml/extensions.pxi | 6 | ||||
-rw-r--r-- | src/lxml/iterparse.pxi | 10 | ||||
-rw-r--r-- | src/lxml/lxml.etree.pyx | 128 | ||||
-rw-r--r-- | src/lxml/parser.pxi | 31 | ||||
-rw-r--r-- | src/lxml/readonlytree.pxi | 4 | ||||
-rw-r--r-- | src/lxml/relaxng.pxi | 5 | ||||
-rw-r--r-- | src/lxml/saxparser.pxi | 2 | ||||
-rw-r--r-- | src/lxml/schematron.pxi | 7 | ||||
-rw-r--r-- | src/lxml/serializer.pxi | 5 | ||||
-rw-r--r-- | src/lxml/xinclude.pxi | 3 | ||||
-rw-r--r-- | src/lxml/xmlerror.pxi | 2 | ||||
-rw-r--r-- | src/lxml/xmlschema.pxi | 14 | ||||
-rw-r--r-- | src/lxml/xpath.pxi | 22 | ||||
-rw-r--r-- | src/lxml/xslt.pxi | 35 | ||||
-rw-r--r-- | src/lxml/xsltext.pxi | 2 |
19 files changed, 245 insertions, 87 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 239312ae..520db93e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -77,6 +77,9 @@ Features added Bugs fixed ---------- +* API is hardened against invalid proxy instances to prevent crashes + due to incorrectly instantiated Element instances. + * Prevent crash when instantiating ``CommentBase`` and friends. * Export ElementTree compatible XML parser class as diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index 2e600b44..6ee1cd92 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -11,6 +11,12 @@ cdef void displayNode(xmlNode* c_node, indent): displayNode(c_child, indent + 1) c_child = c_child.next +cdef inline int _assertValidNode(_Element element) except -1: + assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element) + +cdef inline int _assertValidDoc(_Document doc) except -1: + assert doc._c_doc is not NULL, u"invalid Document proxy at %s" % id(doc) + cdef _Document _documentOrRaise(object input): u"""Call this to get the document of a _Document, _ElementTree or _Element object, or to raise an exception if it can't be determined. @@ -33,8 +39,8 @@ cdef _Document _documentOrRaise(object input): if doc is None: raise ValueError, u"Input object has no document: %s" % \ python._fqtypename(input) - else: - return doc + _assertValidDoc(doc) + return doc cdef _Element _rootNodeOrRaise(object input): u"""Call this to get the root node of a _Document, _ElementTree or @@ -55,36 +61,41 @@ cdef _Element _rootNodeOrRaise(object input): if node is None: raise ValueError, u"Input object has no element: %s" % \ python._fqtypename(input) - else: - return node + _assertValidNode(node) + return node cdef _Document _documentOf(object input): # call this to get the document of a # _Document, _ElementTree or _Element object # may return None! cdef _Element element + cdef _Document doc = None if isinstance(input, _ElementTree): element = (<_ElementTree>input)._context_node if element is not None: - return element._doc + doc = element._doc elif isinstance(input, _Element): - return (<_Element>input)._doc + doc = (<_Element>input)._doc elif isinstance(input, _Document): - return <_Document>input - return None + doc = <_Document>input + if doc is not None: + _assertValidDoc(doc) + return doc cdef _Element _rootNodeOf(object input): # call this to get the root node of a # _Document, _ElementTree or _Element object # may return None! + cdef _Element element = None if isinstance(input, _ElementTree): - return (<_ElementTree>input)._context_node + element = (<_ElementTree>input)._context_node elif isinstance(input, _Element): - return <_Element>input + element = <_Element>input elif isinstance(input, _Document): - return (<_Document>input).getroot() - else: - return None + element = (<_Document>input).getroot() + if element is not None: + _assertValidNode(element) + return element cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, _BaseParser parser, text, tail, attrib, nsmap, @@ -183,6 +194,7 @@ cdef _Element _makeSubElement(_Element parent, tag, text, tail, cdef xmlDoc* c_doc if parent is None or parent._doc is None: return None + _assertValidNode(parent) ns_utf, name_utf = _getNsTag(tag) c_doc = parent._doc._c_doc @@ -1181,6 +1193,7 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node, if c_node is not NULL: for element in elements: assert element is not None, u"Node must not be None" + _assertValidNode(element) # move element and tail over c_source_doc = element._c_node.doc c_next = element._c_node.next @@ -1205,10 +1218,12 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node, if left_to_right: for element in elements: assert element is not None, u"Node must not be None" + _assertValidNode(element) _appendChild(parent, element) else: for element in elements: assert element is not None, u"Node must not be None" + _assertValidNode(element) _prependChild(parent, element) return 0 diff --git a/src/lxml/docloader.pxi b/src/lxml/docloader.pxi index dd95e1e8..42409a53 100644 --- a/src/lxml/docloader.pxi +++ b/src/lxml/docloader.pxi @@ -1,6 +1,7 @@ # Custom resolver API ctypedef enum _InputDocumentDataType: + PARSER_DATA_INVALID PARSER_DATA_EMPTY PARSER_DATA_STRING PARSER_DATA_FILENAME @@ -12,6 +13,10 @@ cdef class _InputDocument: cdef object _filename cdef object _file + def __cinit__(self): + self._type = PARSER_DATA_INVALID + + cdef class Resolver: u"This is the base class of all resolvers." def resolve(self, system_url, public_id, context): @@ -101,7 +106,7 @@ cdef class Resolver: cdef class _ResolverRegistry: cdef object _resolvers cdef Resolver _default_resolver - def __init__(self, Resolver default_resolver=None): + def __cinit__(self, Resolver default_resolver=None): self._resolvers = set() self._default_resolver = default_resolver diff --git a/src/lxml/dtd.pxi b/src/lxml/dtd.pxi index 9c617d65..bd081bad 100644 --- a/src/lxml/dtd.pxi +++ b/src/lxml/dtd.pxi @@ -28,8 +28,10 @@ cdef class DTD(_Validator): catalog. """ cdef tree.xmlDtd* _c_dtd - def __init__(self, file=None, *, external_id=None): + def __cinit__(self): self._c_dtd = NULL + + def __init__(self, file=None, *, external_id=None): _Validator.__init__(self) if file is not None: if _isString(file): @@ -69,6 +71,7 @@ cdef class DTD(_Validator): cdef dtdvalid.xmlValidCtxt* valid_ctxt cdef int ret + assert self._c_dtd is not NULL, "DTD not initialised" doc = _documentOrRaise(etree) root_node = _rootNodeOrRaise(etree) diff --git a/src/lxml/extensions.pxi b/src/lxml/extensions.pxi index 120b661b..6712d151 100644 --- a/src/lxml/extensions.pxi +++ b/src/lxml/extensions.pxi @@ -42,6 +42,8 @@ cdef class _BaseContext: cdef _TempStore _temp_refs cdef set _temp_documents cdef _ExceptionContext _exc + def __cinit__(self): + self._xpathCtxt = NULL def __init__(self, namespaces, extensions, enable_regexp, build_smart_strings): @@ -340,7 +342,7 @@ cdef class _BaseContext: """ cdef _Document doc for doc in self._temp_documents: - if doc._c_doc is c_node.doc: + if doc is not None and doc._c_doc is c_node.doc: return doc return None @@ -374,7 +376,7 @@ def Extension(module, function_mapping=None, *, ns=None): cdef class _ExsltRegExp: cdef dict _compile_map - def __init__(self): + def __cinit__(self): self._compile_map = {} cdef _make_string(self, value): diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi index 74f45e17..71c7bf9e 100644 --- a/src/lxml/iterparse.pxi +++ b/src/lxml/iterparse.pxi @@ -79,7 +79,7 @@ cdef class _IterparseContext(_ParserContext): cdef char* _tag_href cdef char* _tag_name - def __init__(self): + def __cinit__(self): self._ns_stack = [] self._pop_ns = self._ns_stack.pop self._node_stack = [] @@ -581,7 +581,7 @@ cdef class iterwalk: cdef _Element node cdef _Element next_node cdef int ns_count - if python.PyList_GET_SIZE(self._events): + if self._events: return self._pop_event(0) ns_count = 0 # find next node @@ -597,7 +597,7 @@ cdef class iterwalk: next_node = None while next_node is None: # back off through parents - self._index = self._index - 1 + self._index -= 1 node = self._end_node() if self._index < 0: break @@ -609,8 +609,8 @@ cdef class iterwalk: elif self._event_filter & ITERPARSE_FILTER_END_NS: ns_count = _countNsDefs(next_node._c_node) self._node_stack.append( (next_node, ns_count) ) - self._index = self._index + 1 - if python.PyList_GET_SIZE(self._events): + self._index += 1 + if self._events: return self._pop_event(0) raise StopIteration diff --git a/src/lxml/lxml.etree.pyx b/src/lxml/lxml.etree.pyx index 0904f7a2..58a77ee9 100644 --- a/src/lxml/lxml.etree.pyx +++ b/src/lxml/lxml.etree.pyx @@ -171,7 +171,7 @@ class LxmlError(Error): def __init__(self, message, error_log=None): if python.PY_VERSION_HEX >= 0x02050000: # Python >= 2.5 uses new style class exceptions - super(_LxmlError, self).__init__(message) + super(_Error, self).__init__(message) else: error_super_init(self, message) if error_log is None: @@ -179,7 +179,7 @@ class LxmlError(Error): else: self.error_log = error_log.copy() -cdef object _LxmlError = LxmlError +cdef object _Error = Error cdef object error_super_init = Error.__init__ @@ -326,7 +326,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: cdef bint hasdoctype(self): # DOCTYPE gets parsed into internal subset (xmlDTD*) - return self._c_doc.intSubset is not NULL + return self._c_doc is not NULL and self._c_doc.intSubset is not NULL cdef getdoctype(self): # get doctype info: root tag, public/system ID (or None if not known) @@ -355,8 +355,7 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: cdef getxmlinfo(self): # return XML version and encoding (or None if not known) - cdef xmlDoc* c_doc - c_doc = self._c_doc + cdef xmlDoc* c_doc = self._c_doc if c_doc.version is NULL: version = None else: @@ -377,8 +376,8 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: cdef buildNewPrefix(self): # get a new unique prefix ("nsX") for this document - if self._ns_counter < python.PyTuple_GET_SIZE(_PREFIX_CACHE): - ns = python.PyTuple_GET_ITEM(_PREFIX_CACHE, self._ns_counter) + if self._ns_counter < len(_PREFIX_CACHE): + ns = _PREFIX_CACHE[self._ns_counter] python.Py_INCREF(ns) else: ns = python.PyBytes_FromFormat("ns%d", self._ns_counter) @@ -444,12 +443,12 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: c_ns = self._findOrBuildNodeNs(c_node, href, NULL, 0) tree.xmlSetNs(c_node, c_ns) -cdef __initPrefixCache(): +cdef tuple __initPrefixCache(): cdef int i return tuple([ python.PyBytes_FromFormat("ns%d", i) for i in range(30) ]) -cdef object _PREFIX_CACHE +cdef tuple _PREFIX_CACHE _PREFIX_CACHE = __initPrefixCache() cdef extern from "etree_defs.h": @@ -607,6 +606,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef _Element element cdef bint left_to_right cdef Py_ssize_t slicelength, step + _assertValidNode(self) if value is None: raise ValueError, u"cannot assign None" if python.PySlice_Check(x): @@ -622,6 +622,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: else: # otherwise: normal item assignment element = value + _assertValidNode(element) c_node = _findChild(self._c_node, x) if c_node is NULL: raise IndexError, u"list index out of range" @@ -642,6 +643,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlNode* c_node cdef xmlNode* c_next cdef Py_ssize_t step, slicelength + _assertValidNode(self) if python.PySlice_Check(x): # slice deletion if _isFullSlice(<python.slice>x): @@ -673,6 +675,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlDoc* c_doc cdef xmlNode* c_node cdef _Document new_doc + _assertValidNode(self) c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive new_doc = _documentFactory(c_doc, self._doc._parser) root = new_doc.getroot() @@ -691,6 +694,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Sets an element attribute. """ + _assertValidNode(self) _setAttributeValue(self, key, value) def append(self, _Element element not None): @@ -698,9 +702,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Adds a subelement to the end of this element. """ + _assertValidNode(self) + _assertValidNode(element) _appendChild(self, element) - def addnext(self, _Element element): + def addnext(self, _Element element not None): u"""addnext(self, element) Adds the element as a following sibling directly after this @@ -710,6 +716,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: the root node of a document. Note that tail text is automatically discarded when adding at the root level. """ + _assertValidNode(self) + _assertValidNode(element) if self._c_node.parent != NULL and not _isElement(self._c_node.parent): if element._c_node.type != tree.XML_PI_NODE: if element._c_node.type != tree.XML_COMMENT_NODE: @@ -717,7 +725,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: element.tail = None _appendSibling(self, element) - def addprevious(self, _Element element): + def addprevious(self, _Element element not None): u"""addprevious(self, element) Adds the element as a preceding sibling directly before this @@ -727,6 +735,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: before the root node of a document. Note that tail text is automatically discarded when adding at the root level. """ + _assertValidNode(self) + _assertValidNode(element) if self._c_node.parent != NULL and not _isElement(self._c_node.parent): if element._c_node.type != tree.XML_PI_NODE: if element._c_node.type != tree.XML_COMMENT_NODE: @@ -739,7 +749,10 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Extends the current children by the elements in the iterable. """ + _assertValidNode(self) for element in elements: + assert element is not None, u"Node must not be None" + _assertValidNode(element) _appendChild(self, element) def clear(self): @@ -752,6 +765,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlAttr* c_attr_next cdef xmlNode* c_node cdef xmlNode* c_node_next + _assertValidNode(self) c_node = self._c_node # remove self.text and self.tail _removeText(c_node.children) @@ -780,6 +794,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlNode* c_node cdef xmlNode* c_next cdef xmlDoc* c_source_doc + _assertValidNode(self) + _assertValidNode(element) c_node = _findChild(self._c_node, index) if c_node is NULL: _appendChild(self, element) @@ -799,6 +815,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: """ cdef xmlNode* c_node cdef xmlNode* c_next + _assertValidNode(self) + _assertValidNode(element) c_node = element._c_node if c_node.parent is not self._c_node: raise ValueError, u"Element is not a child of this node." @@ -819,6 +837,9 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlNode* c_new_node cdef xmlNode* c_new_next cdef xmlDoc* c_source_doc + _assertValidNode(self) + _assertValidNode(old_element) + _assertValidNode(new_element) c_old_node = old_element._c_node if c_old_node.parent is not self._c_node: raise ValueError, u"Element is not a child of this node." @@ -840,11 +861,13 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: def __get__(self): if self._tag is not None: return self._tag + _assertValidNode(self) self._tag = _namespacedName(self._c_node) return self._tag def __set__(self, value): cdef _BaseParser parser + _assertValidNode(self) ns, name = _getNsTag(value) parser = self._doc._parser if parser is not None and parser._for_html: @@ -863,6 +886,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: keys(), values() and items() to access element attributes. """ def __get__(self): + _assertValidNode(self) return _Attrib(self) ## cdef python.PyObject* ref ## if self._attrib is not None: @@ -878,9 +902,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: the value None, if there was no text. """ def __get__(self): + _assertValidNode(self) return _collectText(self._c_node.children) def __set__(self, value): + _assertValidNode(self) if isinstance(value, QName): value = python.PyUnicode_FromEncodedObject( _resolveQNameText(self, value), 'UTF-8', 'strict') @@ -896,9 +922,11 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: there was no text. """ def __get__(self): + _assertValidNode(self) return _collectText(self._c_node.next) def __set__(self, value): + _assertValidNode(self) _setTailText(self._c_node, value) # using 'del el.tail' is the wrong thing to do @@ -921,6 +949,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: """ def __get__(self): cdef long line + _assertValidNode(self) line = tree.xmlGetLineNo(self._c_node) if line > 0: return line @@ -928,6 +957,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: return None def __set__(self, line): + _assertValidNode(self) if line < 0: self._c_node.line = 0 else: @@ -945,6 +975,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef xmlNode* c_node cdef xmlNs* c_ns cdef dict nsmap = {} + _assertValidNode(self) c_node = self._c_node while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: c_ns = c_node.nsDef @@ -973,6 +1004,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: """ def __get__(self): cdef char* c_base + _assertValidNode(self) c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node) if c_base is NULL: if self._doc._c_doc.URL is NULL: @@ -981,8 +1013,10 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: base = _decodeFilename(c_base) tree.xmlFree(c_base) return base + def __set__(self, url): cdef char* c_base + _assertValidNode(self) if url is None: c_base = NULL else: @@ -1004,6 +1038,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef Py_ssize_t c, i cdef _node_to_node_function next_element cdef list result + _assertValidNode(self) if python.PySlice_Check(x): # slicing if _isFullSlice(<python.slice>x): @@ -1036,6 +1071,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Returns the number of subelements. """ + _assertValidNode(self) return _countElements(self._c_node.children) def __nonzero__(self): @@ -1047,11 +1083,13 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: FutureWarning ) # emulate old behaviour + _assertValidNode(self) return _hasChild(self._c_node) def __contains__(self, element): u"__contains__(self, element)" cdef xmlNode* c_node + _assertValidNode(self) if not isinstance(element, _Element): return 0 c_node = (<_Element>element)._c_node @@ -1076,6 +1114,8 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef Py_ssize_t c_start, c_stop cdef xmlNode* c_child cdef xmlNode* c_start_node + _assertValidNode(self) + _assertValidNode(child) c_child = child._c_node if c_child.parent is not self._c_node: raise ValueError, u"Element is not a child of this node." @@ -1155,6 +1195,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Gets an element attribute. """ + _assertValidNode(self) return _getAttributeValue(self, key, default) def keys(self): @@ -1163,6 +1204,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Gets a list of attribute names. The names are returned in an arbitrary order (just like for an ordinary Python dictionary). """ + _assertValidNode(self) return _collectAttributes(self._c_node, 1) def values(self): @@ -1171,6 +1213,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Gets element attribute values as a sequence of strings. The attributes are returned in an arbitrary order. """ + _assertValidNode(self) return _collectAttributes(self._c_node, 2) def items(self): @@ -1179,6 +1222,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ + _assertValidNode(self) return _collectAttributes(self._c_node, 3) def getchildren(self): @@ -1191,6 +1235,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: ElementTree 1.3 and lxml 2.0. New code should use ``list(element)`` or simply iterate over elements. """ + _assertValidNode(self) return _collectChildren(self) def getparent(self): @@ -1199,6 +1244,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Returns the parent of this element or None for the root element. """ cdef xmlNode* c_node + #_assertValidNode(self) # not needed c_node = _parentElement(self._c_node) if c_node is NULL: return None @@ -1210,6 +1256,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Returns the following sibling of this element or None. """ cdef xmlNode* c_node + #_assertValidNode(self) # not needed c_node = _nextElement(self._c_node) if c_node is NULL: return None @@ -1221,6 +1268,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Returns the preceding sibling of this element or None. """ cdef xmlNode* c_node + #_assertValidNode(self) # not needed c_node = _previousElement(self._c_node) if c_node is NULL: return None @@ -1282,6 +1330,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: This is the same as following element.getparent() up the tree until it returns None (for the root element) and then build an ElementTree for the last parent that was returned.""" + _assertValidDoc(self._doc) return _elementTreeFactory(self._doc, None) def getiterator(self, tag=None): @@ -1339,6 +1388,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: Creates a new element associated with the same document. """ + _assertValidDoc(self._doc) return _makeElement(_tag, NULL, self._doc, None, None, None, attrib, nsmap, _extra) @@ -1408,7 +1458,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for fast instantiation - cdef _Element NEW_ELEMENT "PY_NEW" (object t) + cdef object NEW_ELEMENT "PY_NEW" (object t) cdef _Element _elementFactory(_Document doc, xmlNode* c_node): cdef _Element result @@ -1461,6 +1511,7 @@ cdef class __ContentOnlyElement(_Element): property text: def __get__(self): + _assertValidNode(self) if self._c_node.content is NULL: return '' else: @@ -1469,6 +1520,7 @@ cdef class __ContentOnlyElement(_Element): def __set__(self, value): cdef tree.xmlDict* c_dict cdef char* c_text + _assertValidNode(self) if value is None: c_text = NULL else: @@ -1520,9 +1572,11 @@ cdef class _ProcessingInstruction(__ContentOnlyElement): property target: # not in ElementTree def __get__(self): + _assertValidNode(self) return funicode(self._c_node.name) def __set__(self, value): + _assertValidNode(self) value = _utf8(value) c_text = _cstr(value) tree.xmlNodeSetName(self._c_node, c_text) @@ -1542,9 +1596,11 @@ cdef class _Entity(__ContentOnlyElement): property name: # not in ElementTree def __get__(self): + _assertValidNode(self) return funicode(self._c_node.name) def __set__(self, value): + _assertValidNode(self) value_utf = _utf8(value) assert u'&' not in value and u';' not in value, \ u"Invalid entity name '%s'" % value @@ -1554,6 +1610,7 @@ cdef class _Entity(__ContentOnlyElement): # FIXME: should this be None or '&[VALUE];' or the resolved # entity value ? def __get__(self): + _assertValidNode(self) return u'&%s;' % funicode(self._c_node.name) def __repr__(self): @@ -1659,6 +1716,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType, Relocate the ElementTree to a new root node. """ + _assertValidNode(root) if root._c_node.type != tree.XML_ELEMENT_NODE: raise TypeError, u"Only elements can be the root of an ElementTree" self._context_node = root @@ -1693,6 +1751,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType, python.PyErr_NoMemory() return _elementTreeFactory(None, root) elif self._doc is not None: + _assertValidDoc(self._doc) c_doc = tree.xmlCopyDoc(self._doc._c_doc, 1) if c_doc is NULL: python.PyErr_NoMemory() @@ -1754,6 +1813,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType, cdef bint write_declaration cdef int is_standalone self._assertHasRoot() + _assertValidNode(self._context_node) if compression is None or compression < 0: compression = 0 # C14N serialisation @@ -1797,12 +1857,24 @@ cdef public class _ElementTree [ type LxmlElementTreeType, Returns a structural, absolute XPath expression to find that element. """ cdef _Document doc + cdef _Element root cdef xmlDoc* c_doc cdef char* c_path - doc = self._context_node._doc + _assertValidNode(element) + if self._context_node is not None: + root = self._context_node + doc = root._doc + elif self._doc is not None: + doc = self._doc + root = doc.getroot() + else: + raise ValueError, u"Element is not in this tree." + _assertValidDoc(doc) + _assertValidNode(root) if element._doc is not doc: raise ValueError, u"Element is not in this tree." - c_doc = _fakeRootDoc(doc._c_doc, self._context_node._c_node) + + c_doc = _fakeRootDoc(doc._c_doc, root._c_node) c_path = tree.xmlGetNodePath(element._c_node) _destroyFakeDoc(doc._c_doc, c_doc) if c_path is NULL: @@ -2020,7 +2092,6 @@ cdef public class _ElementTree [ type LxmlElementTreeType, Note that XInclude does not support custom resolvers in Python space due to restrictions of libxml2 <= 2.6.29. """ - cdef int result self._assertHasRoot() XInclude()(self._context_node) @@ -2034,6 +2105,7 @@ cdef public class _ElementTree [ type LxmlElementTreeType, The ``compression`` option enables GZip compression level 1-9. """ self._assertHasRoot() + _assertValidNode(self._context_node) if compression is None or compression < 0: compression = 0 _tofilelikeC14N(file, self._context_node, exclusive, with_comments, @@ -2049,7 +2121,10 @@ cdef _ElementTree _newElementTree(_Document doc, _Element context_node, if context_node is None and doc is not None: context_node = doc.getroot() if context_node is None: + _assertValidDoc(doc) result._doc = doc + else: + _assertValidNode(context_node) result._context_node = context_node return result @@ -2059,6 +2134,7 @@ cdef class _Attrib: """ cdef _Element _element def __cinit__(self, _Element element not None): + _assertValidNode(element) self._element = element # MANIPULATORS @@ -2285,8 +2361,9 @@ cdef class ElementChildIterator(_ElementIterator): u"""ElementChildIterator(self, node, tag=None, reversed=False) Iterates over the children of an element. """ - def __init__(self, _Element node not None, tag=None, *, reversed=False): + def __cinit__(self, _Element node not None, tag=None, *, reversed=False): cdef xmlNode* c_node + _assertValidNode(node) self._initTagMatch(tag) if reversed: c_node = _findChildBackwards(node._c_node, 0) @@ -2310,7 +2387,8 @@ cdef class SiblingsIterator(_ElementIterator): You can pass the boolean keyword ``preceding`` to specify the direction. """ - def __init__(self, _Element node not None, tag=None, *, preceding=False): + def __cinit__(self, _Element node not None, tag=None, *, preceding=False): + _assertValidNode(node) self._initTagMatch(tag) if preceding: self._next_element = _previousElement @@ -2322,7 +2400,8 @@ cdef class AncestorsIterator(_ElementIterator): u"""AncestorsIterator(self, node, tag=None) Iterates over the ancestors of an element (from parent to parent). """ - def __init__(self, _Element node not None, tag=None): + def __cinit__(self, _Element node not None, tag=None): + _assertValidNode(node) self._initTagMatch(tag) self._next_element = _parentElement self._storeNext(node) @@ -2351,7 +2430,8 @@ cdef class ElementDepthFirstIterator(_ElementTagMatcher): # keep next node to return and the (s)top node cdef _Element _next_node cdef _Element _top_node - def __init__(self, _Element node not None, tag=None, *, inclusive=True): + def __cinit__(self, _Element node not None, tag=None, *, inclusive=True): + _assertValidNode(node) self._top_node = node self._next_node = node self._initTagMatch(tag) @@ -2417,7 +2497,8 @@ cdef class ElementTextIterator: """ cdef object _nextEvent cdef _Element _start_element - def __init__(self, _Element element not None, tag=None, *, with_tail=True): + def __cinit__(self, _Element element not None, tag=None, *, with_tail=True): + _assertValidNode(element) if with_tail: events = (u"start", u"end") else: @@ -2683,7 +2764,7 @@ def iselement(element): Checks if an object appears to be a valid element object. """ - return isinstance(element, _Element) + return isinstance(element, _Element) and (<_Element>element)._c_node is not NULL def dump(_Element elem not None, *, pretty_print=True, with_tail=True): u"""dump(elem, pretty_print=True, with_tail=True) @@ -2691,6 +2772,7 @@ def dump(_Element elem not None, *, pretty_print=True, with_tail=True): Writes an element tree or element structure to sys.stdout. This function should be used for debugging only. """ + _assertValidNode(elem) _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail) def tostring(element_or_tree, *, encoding=None, method=u"xml", @@ -2900,8 +2982,7 @@ class DocumentInvalid(LxmlError): cdef class _Validator: u"Base class for XML validators." cdef _ErrorLog _error_log - def __init__(self): - u"__init__(self)" + def __cinit__(self): self._error_log = _ErrorLog() def validate(self, etree): @@ -2943,6 +3024,7 @@ cdef class _Validator: property error_log: u"The log of validation errors and warnings." def __get__(self): + assert self._error_log is not None, "XPath evaluator not initialised" return self._error_log.copy() include "dtd.pxi" # DTD diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi index e85d927b..5f027baa 100644 --- a/src/lxml/parser.pxi +++ b/src/lxml/parser.pxi @@ -20,18 +20,18 @@ class ParseError(LxmlSyntaxError): # Python >= 2.5 uses new style class exceptions super(_ParseError, self).__init__(message) else: - _XMLSyntaxError.__init__(self, message) + _LxmlSyntaxError.__init__(self, message) self.position = (line, column) self.code = code +cdef object _LxmlSyntaxError = LxmlSyntaxError +cdef object _ParseError = ParseError + class XMLSyntaxError(ParseError): u"""Syntax error while parsing an XML document. """ pass -cdef object _XMLSyntaxError = XMLSyntaxError -cdef object _ParseError = ParseError - class ParserError(LxmlError): u"""Internal lxml parser error. """ @@ -51,7 +51,8 @@ cdef class _ParserDictionaryContext: cdef _BaseParser _default_parser cdef list _implied_parser_contexts - def __init__(self): + def __cinit__(self): + self._c_dict = NULL self._implied_parser_contexts = [] def __dealloc__(self): @@ -65,7 +66,7 @@ cdef class _ParserDictionaryContext: cdef python.PyObject* result thread_dict = python.PyThreadState_GetDict() if thread_dict is not NULL: - (<object>thread_dict)[u"_ParserDictionaryContext"] = self + (<dict>thread_dict)[u"_ParserDictionaryContext"] = self cdef _ParserDictionaryContext _findThreadParserContext(self): u"Find (or create) the _ParserDictionaryContext object for the current thread" @@ -75,7 +76,7 @@ cdef class _ParserDictionaryContext: thread_dict = python.PyThreadState_GetDict() if thread_dict is NULL: return self - d = <object>thread_dict + d = <dict>thread_dict result = python.PyDict_GetItem(d, u"_ParserDictionaryContext") if result is not NULL: return <object>result @@ -264,7 +265,7 @@ cdef class _FileReaderContext: cdef _ExceptionContext _exc_context cdef Py_ssize_t _bytes_read cdef char* _c_url - def __init__(self, filelike, exc_context, url, encoding): + def __cinit__(self, filelike, exc_context, url, encoding): self._exc_context = exc_context self._filelike = filelike self._encoding = encoding @@ -472,6 +473,13 @@ cdef class _ParserContext(_ResolverContext): cdef _ParserSchemaValidationContext _validator cdef xmlparser.xmlParserCtxt* _c_ctxt cdef python.PyThread_type_lock _lock + def __cinit__(self): + self._c_ctxt = NULL + if not config.ENABLE_THREADING: + self._lock = NULL + else: + self._lock = python.PyThread_allocate_lock() + self._error_log = _ErrorLog() def __dealloc__(self): if self._validator is not None: @@ -543,13 +551,8 @@ cdef _initParserContext(_ParserContext context, _ResolverRegistry resolvers, xmlparser.xmlParserCtxt* c_ctxt): _initResolverContext(context, resolvers) - if not config.ENABLE_THREADING: - context._lock = NULL - else: - context._lock = python.PyThread_allocate_lock() if c_ctxt is not NULL: context._initParserContext(c_ctxt) - context._error_log = _ErrorLog() cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename, _ErrorLog error_log) except 0: @@ -839,6 +842,8 @@ cdef class _BaseParser: parser._resolvers = self._resolvers parser.target = self.target parser._class_lookup = self._class_lookup + parser._default_encoding = self._default_encoding + parser._schema = self._schema return parser def copy(self): diff --git a/src/lxml/readonlytree.pxi b/src/lxml/readonlytree.pxi index 838b313c..9caad4a0 100644 --- a/src/lxml/readonlytree.pxi +++ b/src/lxml/readonlytree.pxi @@ -6,6 +6,9 @@ cdef class _ReadOnlyProxy: cdef xmlNode* _c_node cdef _ReadOnlyProxy _source_proxy cdef list _dependent_proxies + def __cinit__(self): + self._c_node = NULL + self._free_after_use = 0 cdef int _assertNode(self) except -1: u"""This is our way of saying: this proxy is invalid! @@ -329,7 +332,6 @@ cdef _ReadOnlyProxy _newReadOnlyProxy( cdef inline _initReadOnlyProxy(_ReadOnlyProxy el, _ReadOnlyProxy source_proxy): - el._free_after_use = 0 if source_proxy is None: el._source_proxy = el el._dependent_proxies = [el] diff --git a/src/lxml/relaxng.pxi b/src/lxml/relaxng.pxi index 9813ca81..fe673e6d 100644 --- a/src/lxml/relaxng.pxi +++ b/src/lxml/relaxng.pxi @@ -27,6 +27,9 @@ cdef class RelaxNG(_Validator): filename through the ``file`` keyword argument. """ cdef relaxng.xmlRelaxNG* _c_schema + def __cinit__(self): + self._c_schema = NULL + def __init__(self, etree=None, *, file=None): cdef _Document doc cdef _Element root_node @@ -35,7 +38,6 @@ cdef class RelaxNG(_Validator): cdef char* c_href cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt _Validator.__init__(self) - self._c_schema = NULL fake_c_doc = NULL if etree is not None: doc = _documentOrRaise(etree) @@ -103,6 +105,7 @@ cdef class RelaxNG(_Validator): cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt cdef int ret + assert self._c_schema is not NULL, "RelaxNG instance not initialised" doc = _documentOrRaise(etree) root_node = _rootNodeOrRaise(etree) diff --git a/src/lxml/saxparser.pxi b/src/lxml/saxparser.pxi index f75ea7c7..5c1bb69b 100644 --- a/src/lxml/saxparser.pxi +++ b/src/lxml/saxparser.pxi @@ -340,7 +340,7 @@ cdef class TreeBuilder(_SaxParserTarget): cdef list _data cdef list _element_stack cdef object _element_stack_pop - cdef _Element _last + cdef _Element _last # may be None cdef bint _in_tail def __init__(self, *, element_factory=None, parser=None): diff --git a/src/lxml/schematron.pxi b/src/lxml/schematron.pxi index 0291c8fa..664e757b 100644 --- a/src/lxml/schematron.pxi +++ b/src/lxml/schematron.pxi @@ -70,14 +70,16 @@ cdef class Schematron(_Validator): """ cdef schematron.xmlSchematron* _c_schema cdef xmlDoc* _c_schema_doc + def __cinit__(self): + self._c_schema = NULL + self._c_schema_doc = NULL + def __init__(self, etree=None, *, file=None): cdef _Document doc cdef _Element root_node cdef xmlNode* c_node cdef char* c_href cdef schematron.xmlSchematronParserCtxt* parser_ctxt - self._c_schema = NULL - self._c_schema_doc = NULL _Validator.__init__(self) if not config.ENABLE_SCHEMATRON: raise SchematronError, \ @@ -138,6 +140,7 @@ cdef class Schematron(_Validator): cdef int ret cdef int options + assert self._c_schema is not NULL, "Schematron instance not initialised" doc = _documentOrRaise(etree) root_node = _rootNodeOrRaise(etree) diff --git a/src/lxml/serializer.pxi b/src/lxml/serializer.pxi index a4265143..655a566e 100644 --- a/src/lxml/serializer.pxi +++ b/src/lxml/serializer.pxi @@ -89,6 +89,7 @@ cdef _tostring(_Element element, encoding, doctype, method, cdef int error_result if element is None: return None + _assertValidNode(element) c_method = _findOutputMethod(method) if c_method == OUTPUT_METHOD_TEXT: return _textToString(element._c_node, encoding, with_tail) @@ -151,10 +152,12 @@ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments): cdef _Element element if isinstance(element_or_tree, _Element): + _assertValidNode(<_Element>element_or_tree) doc = (<_Element>element_or_tree)._doc c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0) else: doc = _documentOrRaise(element_or_tree) + _assertValidDoc(doc) c_doc = doc._c_doc with nogil: @@ -345,7 +348,7 @@ cdef class _FilelikeWriter: cdef object _close_filelike cdef _ExceptionContext _exc_context cdef _ErrorLog error_log - def __init__(self, filelike, exc_context=None, compression=None): + def __cinit__(self, filelike, exc_context=None, compression=None): if compression is not None and compression > 0: filelike = gzip.GzipFile( fileobj=filelike, mode=u'wb', compresslevel=compression) diff --git a/src/lxml/xinclude.pxi b/src/lxml/xinclude.pxi index 208d55e6..8080b505 100644 --- a/src/lxml/xinclude.pxi +++ b/src/lxml/xinclude.pxi @@ -20,6 +20,7 @@ cdef class XInclude: property error_log: def __get__(self): + assert self._error_log is not None, "XInclude instance not initialised" return self._error_log.copy() def __call__(self, _Element node not None): @@ -32,6 +33,8 @@ cdef class XInclude: # typed as elements. The included fragment is added between the two, # i.e. as a sibling, which does not conflict with traversal. cdef int result + _assertValidNode(node) + assert self._error_log is not None, "XPath evaluator not initialised" self._error_log.connect() __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser( node._doc._parser) diff --git a/src/lxml/xmlerror.pxi b/src/lxml/xmlerror.pxi index a53f87d5..7514b248 100644 --- a/src/lxml/xmlerror.pxi +++ b/src/lxml/xmlerror.pxi @@ -494,7 +494,7 @@ cdef void _forwardError(void* c_log_handler, xmlerror.xmlError* error) with gil: cdef void _receiveError(void* c_log_handler, xmlerror.xmlError* error) nogil: # no Python objects here, may be called without thread context ! # when we declare a Python object, Pyrex will INCREF(None) ! - if __DEBUG != 0: + if __DEBUG: _forwardError(c_log_handler, error) cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...) nogil: diff --git a/src/lxml/xmlschema.pxi b/src/lxml/xmlschema.pxi index 981cec6b..1cf35b4f 100644 --- a/src/lxml/xmlschema.pxi +++ b/src/lxml/xmlschema.pxi @@ -36,6 +36,10 @@ cdef class XMLSchema(_Validator): cdef xmlschema.xmlSchema* _c_schema cdef bint _has_default_attributes cdef bint _add_attribute_defaults + def __cinit__(self): + self._c_schema = NULL + self._has_default_attributes = True # play safe + self._add_attribute_defaults = False def __init__(self, etree=None, *, file=None, attribute_defaults=False): cdef _Document doc @@ -45,9 +49,7 @@ cdef class XMLSchema(_Validator): cdef char* c_href cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt - self._has_default_attributes = True # play safe self._add_attribute_defaults = attribute_defaults - self._c_schema = NULL _Validator.__init__(self) fake_c_doc = NULL if etree is not None: @@ -126,6 +128,7 @@ cdef class XMLSchema(_Validator): cdef xmlDoc* c_doc cdef int ret + assert self._c_schema is not NULL, "Schema instance not initialised" doc = _documentOrRaise(etree) root_node = _rootNodeOrRaise(etree) @@ -161,8 +164,6 @@ cdef class XMLSchema(_Validator): cdef _ParserSchemaValidationContext context context = NEW_SCHEMA_CONTEXT(_ParserSchemaValidationContext) context._schema = self - context._valid_ctxt = NULL - context._sax_plug = NULL context._add_default_attributes = (self._has_default_attributes and ( add_default_attributes or self._add_attribute_defaults)) return context @@ -172,6 +173,10 @@ cdef class _ParserSchemaValidationContext: cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug cdef bint _add_default_attributes + def __cinit__(self): + self._valid_ctxt = NULL + self._sax_plug = NULL + self._add_default_attributes = False def __dealloc__(self): self.disconnect() @@ -179,6 +184,7 @@ cdef class _ParserSchemaValidationContext: xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt) cdef _ParserSchemaValidationContext copy(self): + assert self._schema is not None, "_ParserSchemaValidationContext not initialised" return self._schema._newSaxValidator( self._add_default_attributes) diff --git a/src/lxml/xpath.pxi b/src/lxml/xpath.pxi index 60ca3f6b..dc9dffdb 100644 --- a/src/lxml/xpath.pxi +++ b/src/lxml/xpath.pxi @@ -130,6 +130,13 @@ cdef class _XPathEvaluatorBase: cdef _XPathContext _context cdef python.PyThread_type_lock _eval_lock cdef _ErrorLog _error_log + def __cinit__(self): + self._xpathCtxt = NULL + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + python.PyErr_NoMemory() + self._error_log = _ErrorLog() def __init__(self, namespaces, extensions, enable_regexp, smart_strings): @@ -139,17 +146,13 @@ cdef class _XPathEvaluatorBase: import warnings warnings.warn(u"This version of libxml2 has a known XPath bug. " + \ u"Use it at your own risk.") - self._error_log = _ErrorLog() self._context = _XPathContext(namespaces, extensions, enable_regexp, None, smart_strings) - if config.ENABLE_THREADING: - self._eval_lock = python.PyThread_allocate_lock() - if self._eval_lock is NULL: - python.PyErr_NoMemory() property error_log: def __get__(self): + assert self._error_log is not None, "XPath evaluator not initialised" return self._error_log.copy() def __dealloc__(self): @@ -195,7 +198,7 @@ cdef class _XPathEvaluatorBase: result = python.PyThread_acquire_lock( self._eval_lock, python.WAIT_LOCK) if result == 0: - raise ParserError, u"parser locking failed" + raise XPathError, u"XPath evaluator locking failed" return 0 cdef void _unlock(self): @@ -266,6 +269,8 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc + _assertValidNode(element) + _assertValidDoc(element._doc) self._element = element doc = element._doc _XPathEvaluatorBase.__init__(self, namespaces, extensions, @@ -300,6 +305,7 @@ cdef class XPathElementEvaluator(_XPathEvaluatorBase): cdef xpath.xmlXPathObject* xpathObj cdef _Document doc cdef char* c_path + assert self._xpathCtxt is not NULL, "XPath context not initialised" path = _utf8(_path) doc = self._element._doc @@ -351,6 +357,7 @@ cdef class XPathDocumentEvaluator(XPathElementEvaluator): cdef xmlDoc* c_doc cdef _Document doc cdef char* c_path + assert self._xpathCtxt is not NULL, "XPath context not initialised" path = _utf8(_path) doc = self._element._doc @@ -417,6 +424,8 @@ cdef class XPath(_XPathEvaluatorBase): """ cdef xpath.xmlXPathCompExpr* _xpath cdef bytes _path + def __cinit__(self): + self._xpath = NULL def __init__(self, path, *, namespaces=None, extensions=None, regexp=True, smart_strings=True): @@ -440,6 +449,7 @@ cdef class XPath(_XPathEvaluatorBase): cdef _Document document cdef _Element element + assert self._xpathCtxt is not NULL, "XPath context not initialised" document = _documentOrRaise(_etree_or_element) element = _rootNodeOrRaise(_etree_or_element) diff --git a/src/lxml/xslt.pxi b/src/lxml/xslt.pxi index dab0b568..742fc6a9 100644 --- a/src/lxml/xslt.pxi +++ b/src/lxml/xslt.pxi @@ -124,8 +124,7 @@ cdef void _xslt_store_resolver_exception(char* c_uri, void* context, cdef xmlDoc* _xslt_doc_loader(char* c_uri, tree.xmlDict* c_dict, int parse_options, void* c_ctxt, xslt.xsltLoadType c_type) nogil: - # no Python objects here, may be called without thread context ! - # when we declare a Python object, Pyrex will INCREF(None) ! + # nogil => no Python objects here, may be called without thread context ! cdef xmlDoc* c_doc cdef xmlDoc* result cdef void* c_pcontext @@ -186,7 +185,7 @@ cdef class XSLTAccessControl: See `XSLT`. """ cdef xslt.xsltSecurityPrefs* _prefs - def __init__(self, *, read_file=True, write_file=True, create_dir=True, + def __cinit__(self, *, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True): self._prefs = xslt.xsltNewSecurityPrefs() if self._prefs is NULL: @@ -271,10 +270,12 @@ cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt cdef _ReadOnlyElementProxy _extension_element_proxy cdef dict _extension_elements - def __init__(self, namespaces, extensions, enable_regexp, - build_smart_strings): + def __cinit__(self): self._xsltCtxt = NULL self._extension_elements = EMPTY_DICT + + def __init__(self, namespaces, extensions, enable_regexp, + build_smart_strings): if extensions is not None and extensions: for ns_name_tuple, extension in extensions.items(): if ns_name_tuple[0] is None: @@ -320,7 +321,7 @@ cdef class _XSLTQuotedStringParam: quote escaping. """ cdef bytes strval - def __init__(self, strval): + def __cinit__(self, strval): self.strval = _utf8(strval) @@ -356,6 +357,9 @@ cdef class XSLT: cdef XSLTAccessControl _access_control cdef _ErrorLog _error_log + def __cinit__(self): + self._c_style = NULL + def __init__(self, xslt_input, *, extensions=None, regexp=True, access_control=None): cdef xslt.xsltStylesheet* c_style @@ -413,7 +417,8 @@ cdef class XSLT: self._xslt_resolver_context._c_style_doc is not NULL: tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) # this cleans up the doc copy as well - xslt.xsltFreeStylesheet(self._c_style) + if self._c_style is not NULL: + xslt.xsltFreeStylesheet(self._c_style) property error_log: u"The log of errors and warnings of an XSLT execution." @@ -477,6 +482,7 @@ cdef class XSLT: cdef tree.xmlDict* c_dict cdef char** params + assert self._c_style is not NULL, "XSLT stylesheet not initialised" input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) @@ -645,6 +651,7 @@ cdef extern from "etree_defs.h": cdef XSLT _copyXSLT(XSLT stylesheet): cdef XSLT new_xslt cdef xmlDoc* c_doc + assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised" new_xslt = NEW_XSLT(XSLT) # without calling __init__() new_xslt._access_control = stylesheet._access_control new_xslt._error_log = _ErrorLog() @@ -668,6 +675,11 @@ cdef class _XSLTResultTree(_ElementTree): cdef char* _buffer cdef Py_ssize_t _buffer_len cdef Py_ssize_t _buffer_refcnt + def __cinit__(self): + self._buffer = NULL + self._buffer_len = 0 + self._buffer_refcnt = 0 + cdef _saveToStringAndSize(self, char** s, int* l): cdef _Document doc cdef int r @@ -719,7 +731,7 @@ cdef class _XSLTResultTree(_ElementTree): def __getbuffer__(self, Py_buffer* buffer, int flags): cdef int l if buffer is NULL: - return # LOCK + return if self._buffer is NULL or flags & python.PyBUF_WRITABLE: self._saveToStringAndSize(<char**>&buffer.buf, &l) buffer.len = l @@ -748,7 +760,7 @@ cdef class _XSLTResultTree(_ElementTree): def __releasebuffer__(self, Py_buffer* buffer): if buffer is NULL: - return # UNLOCK + return if <char*>buffer.buf is self._buffer: self._buffer_refcnt -= 1 if self._buffer_refcnt == 0: @@ -778,9 +790,6 @@ cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile): result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree) result._xslt = xslt result._profile = profile - result._buffer = NULL - result._buffer_refcnt = 0 - result._buffer_len = 0 return result # functions like "output" and "write" are a potential security risk, but we @@ -831,6 +840,7 @@ cdef class _XSLTProcessingInstruction(PIBase): cdef _Element result_node cdef char* c_href cdef xmlAttr* c_attr + _assertValidNode(self) if self._c_node.content is NULL: raise ValueError, u"PI lacks content" hrefs = _FIND_PI_HREF(u' ' + funicode(self._c_node.content)) @@ -852,6 +862,7 @@ cdef class _XSLTProcessingInstruction(PIBase): # ID reference to embedded stylesheet # try XML:ID lookup + _assertValidDoc(self._doc) c_href += 1 # skip leading '#' c_attr = tree.xmlGetID(self._c_node.doc, c_href) if c_attr is not NULL and c_attr.doc is self._c_node.doc: diff --git a/src/lxml/xsltext.pxi b/src/lxml/xsltext.pxi index 436a6b54..207865c9 100644 --- a/src/lxml/xsltext.pxi +++ b/src/lxml/xsltext.pxi @@ -37,6 +37,7 @@ cdef class XSLTExtension: cdef xmlNode* c_parent cdef xmlNode* c_node cdef xmlNode* c_context_node + assert context._xsltCtxt is not NULL, "XSLT context not initialised" c_context_node = _roNodeOf(node) #assert c_context_node.doc is context._xsltContext.node.doc, \ # "switching input documents during transformation is not currently supported" @@ -80,6 +81,7 @@ cdef class XSLTExtension: cdef xmlNode* c_parent cdef xslt.xsltTransformContext* c_ctxt = context._xsltCtxt cdef xmlNode* c_old_output_parent = c_ctxt.insert + assert context._xsltCtxt is not NULL, "XSLT context not initialised" # output_parent node is used for adding results instead of # elements list used in apply_templates, that's easier and allows to |