diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-02-28 20:53:29 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-02-28 20:53:29 +0100 |
commit | f529aeb1bb234cf7dc0cf23e1e7fd98ce4953e85 (patch) | |
tree | 68b16a4f04e39d9b8e173fc35a7fd00400e9e953 | |
parent | f8bb21857f8cfad0c707b6785ae0ec1832011fbf (diff) | |
download | python-lxml-f529aeb1bb234cf7dc0cf23e1e7fd98ce4953e85.tar.gz |
Fix crash due to incorrect dict handling for text nodes.
The C doc link needs to be set after removing text from the dict and before putting it there. Thus, it is best to separate the adaptations into two traversals again.
-rw-r--r-- | src/lxml/apihelpers.pxi | 4 | ||||
-rw-r--r-- | src/lxml/proxy.pxi | 35 |
2 files changed, 23 insertions, 16 deletions
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi index bccf5fbb..cf932d43 100644 --- a/src/lxml/apihelpers.pxi +++ b/src/lxml/apihelpers.pxi @@ -1269,7 +1269,7 @@ cdef int _replaceSlice(_Element parent, xmlNode* c_node, cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1: - """Simple version of 'xmlAddChild()' that does not deep-fix the document links. + """Adaptation of 'xmlAddChild()' that deep-fix the document links iteratively. """ assert _isElement(c_node) c_node.parent = c_parent @@ -1279,6 +1279,8 @@ cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1: c_node.prev = c_parent.last c_parent.last.next = c_node c_parent.last = c_node + + _setTreeDoc(c_node, c_parent.doc) return 0 diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi index 2f8e76c5..ff277c53 100644 --- a/src/lxml/proxy.pxi +++ b/src/lxml/proxy.pxi @@ -332,16 +332,11 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, cdef proxy_count = 0 if not tree._isElementOrXInclude(c_element): - c_element.doc = c_doc - _fixDocChildren(c_element.children, c_doc) return 0 c_start_node = c_element tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) - # 0) set C doc link - c_element.doc = c_doc - if tree._isElementOrXInclude(c_element): if hasProxy(c_element): proxy_count += 1 @@ -352,7 +347,6 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, try: _stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list) except: - _fixDocChildren(c_start_node.children, c_doc) _cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list) raise @@ -365,14 +359,6 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, while c_node is not NULL: if c_node.ns is not NULL: _fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list) - - # remove attribute from ID table (see xmlSetTreeDoc() in libxml2's tree.c) - c_attr = <tree.xmlAttr*>c_node - if c_attr.atype == tree.XML_ATTRIBUTE_ID: - tree.xmlRemoveID(c_source_doc, c_attr) - # set C doc link also for attributes - c_node.doc = c_doc - _fixDocChildren(c_node.children, c_doc) c_node = c_node.next tree.END_FOR_EACH_FROM(c_element) @@ -405,10 +391,29 @@ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, return 0 +cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc): + """Adaptation of 'xmlSetTreeDoc()' that deep-fix the document links iteratively. + It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42 + """ + tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + c_attr = <tree.xmlAttr*>c_node.properties + while c_attr: + if c_attr.atype == tree.XML_ATTRIBUTE_ID: + tree.xmlRemoveID(c_node.doc, c_attr) + c_attr.doc = c_doc + _fixDocChildren(c_attr.children, c_doc) + c_attr = c_attr.next + # Set doc link for all nodes, not only elements. + c_node.doc = c_doc + tree.END_FOR_EACH_FROM(c_node) + + cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc): while c_child: c_child.doc = c_doc - _fixDocChildren(c_child.children, c_doc) + if c_child.children: + _fixDocChildren(c_child.children, c_doc) c_child = c_child.next |