diff options
Diffstat (limited to 'Modules/_elementtree.c')
-rw-r--r-- | Modules/_elementtree.c | 834 |
1 files changed, 416 insertions, 418 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 788772113c..42634977da 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -48,6 +48,7 @@ /* See http://www.python.org/psf/license for licensing details. */ #include "Python.h" +#include "structmember.h" #define VERSION "1.0.6" @@ -70,7 +71,7 @@ helps if you have lots of leaf nodes with attributes). */ /* Also note that pymalloc always allocates blocks in multiples of - eight bytes. For the current version of cElementTree, this means + eight bytes. For the current C version of ElementTree, this means that the number of children should be an even number, at least on 32-bit platforms. */ @@ -94,25 +95,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0) #define LOCAL(type) static type #endif -/* compatibility macros */ -#if (PY_VERSION_HEX < 0x02060000) -#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif - -#if (PY_VERSION_HEX < 0x02050000) -typedef int Py_ssize_t; -#define lenfunc inquiry -#endif - -#if (PY_VERSION_HEX < 0x02040000) -#define PyDict_CheckExact PyDict_Check - -#if !defined(Py_RETURN_NONE) -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None -#endif -#endif - /* macros used to store 'join' flags in string object pointers. note that all use of text and tail as object pointers must be wrapped in JOIN_OBJ. see comments in the ElementObject definition for more @@ -123,7 +105,6 @@ typedef int Py_ssize_t; /* glue functions (see the init function for details) */ static PyObject* elementtree_parseerror_obj; -static PyObject* elementtree_copyelement_obj; static PyObject* elementtree_deepcopy_obj; static PyObject* elementtree_iter_obj; static PyObject* elementtree_itertext_obj; @@ -211,7 +192,7 @@ list_join(PyObject* list) } /* -------------------------------------------------------------------- */ -/* the element type */ +/* the Element type */ typedef struct { @@ -226,7 +207,7 @@ typedef struct { PyObject* *children; PyObject* _children[STATIC_CHILDREN]; - + } ElementObjectExtra; typedef struct { @@ -249,6 +230,8 @@ typedef struct { ElementObjectExtra* extra; + PyObject *weakreflist; /* For tp_weaklistoffset */ + } ElementObject; static PyTypeObject Element_Type; @@ -256,10 +239,10 @@ static PyTypeObject Element_Type; #define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type) /* -------------------------------------------------------------------- */ -/* element constructor and destructor */ +/* Element constructors and destructor */ LOCAL(int) -element_new_extra(ElementObject* self, PyObject* attrib) +create_extra(ElementObject* self, PyObject* attrib) { self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); if (!self->extra) @@ -279,27 +262,39 @@ element_new_extra(ElementObject* self, PyObject* attrib) } LOCAL(void) -element_dealloc_extra(ElementObject* self) +dealloc_extra(ElementObject* self) { + ElementObjectExtra *myextra; int i; - Py_DECREF(self->extra->attrib); + if (!self->extra) + return; + + /* Avoid DECREFs calling into this code again (cycles, etc.) + */ + myextra = self->extra; + self->extra = NULL; + + Py_DECREF(myextra->attrib); - for (i = 0; i < self->extra->length; i++) - Py_DECREF(self->extra->children[i]); + for (i = 0; i < myextra->length; i++) + Py_DECREF(myextra->children[i]); - if (self->extra->children != self->extra->_children) - PyObject_Free(self->extra->children); + if (myextra->children != myextra->_children) + PyObject_Free(myextra->children); - PyObject_Free(self->extra); + PyObject_Free(myextra); } +/* Convenience internal function to create new Element objects with the given + * tag and attributes. +*/ LOCAL(PyObject*) -element_new(PyObject* tag, PyObject* attrib) +create_new_element(PyObject* tag, PyObject* attrib) { ElementObject* self; - self = PyObject_New(ElementObject, &Element_Type); + self = PyObject_GC_New(ElementObject, &Element_Type); if (self == NULL) return NULL; @@ -310,16 +305,10 @@ element_new(PyObject* tag, PyObject* attrib) self->extra = NULL; if (attrib != Py_None) { - - if (element_new_extra(self, attrib) < 0) { + if (create_extra(self, attrib) < 0) { PyObject_Del(self); return NULL; } - - self->extra->length = 0; - self->extra->allocated = STATIC_CHILDREN; - self->extra->children = self->extra->_children; - } Py_INCREF(tag); @@ -331,11 +320,94 @@ element_new(PyObject* tag, PyObject* attrib) Py_INCREF(Py_None); self->tail = Py_None; - ALLOC(sizeof(ElementObject), "create element"); + self->weakreflist = NULL; + ALLOC(sizeof(ElementObject), "create element"); + PyObject_GC_Track(self); return (PyObject*) self; } +static PyObject * +element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); + if (e != NULL) { + Py_INCREF(Py_None); + e->tag = Py_None; + + Py_INCREF(Py_None); + e->text = Py_None; + + Py_INCREF(Py_None); + e->tail = Py_None; + + e->extra = NULL; + e->weakreflist = NULL; + } + return (PyObject *)e; +} + +static int +element_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *tag; + PyObject *tmp; + PyObject *attrib = NULL; + ElementObject *self_elem; + + if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) + return -1; + + if (attrib || kwds) { + attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (!attrib) + return -1; + if (kwds) + PyDict_Update(attrib, kwds); + } else { + Py_INCREF(Py_None); + attrib = Py_None; + } + + self_elem = (ElementObject *)self; + + /* Use None for empty dictionaries */ + if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) { + Py_INCREF(Py_None); + attrib = Py_None; + } + + if (attrib != Py_None) { + if (create_extra(self_elem, attrib) < 0) { + PyObject_Del(self_elem); + return -1; + } + } + + /* If create_extra needed attrib, it took a reference to it, so we can + * release ours anyway. + */ + Py_DECREF(attrib); + + /* Replace the objects already pointed to by tag, text and tail. */ + tmp = self_elem->tag; + self_elem->tag = tag; + Py_INCREF(tag); + Py_DECREF(tmp); + + tmp = self_elem->text; + self_elem->text = Py_None; + Py_INCREF(Py_None); + Py_DECREF(JOIN_OBJ(tmp)); + + tmp = self_elem->tail; + self_elem->tail = Py_None; + Py_INCREF(Py_None); + Py_DECREF(JOIN_OBJ(tmp)); + + return 0; +} + LOCAL(int) element_resize(ElementObject* self, int extra) { @@ -346,7 +418,7 @@ element_resize(ElementObject* self, int extra) elements. set an exception and return -1 if allocation failed */ if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); size = self->extra->length + extra; @@ -354,14 +426,14 @@ element_resize(ElementObject* self, int extra) /* use Python 2.4's list growth strategy */ size = (size >> 3) + (size < 9 ? 3 : 6) + size; /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" - * which needs at least 4 bytes. - * Although it's a false alarm always assume at least one child to + * which needs at least 4 bytes. + * Although it's a false alarm always assume at least one child to * be safe. */ size = size ? size : 1; if (self->extra->children != self->extra->_children) { /* Coverity CID #182 size_error: Allocating 1 bytes to pointer - * "children", which needs at least 4 bytes. Although it's a + * "children", which needs at least 4 bytes. Although it's a * false alarm always assume at least one child to be safe. */ children = PyObject_Realloc(self->extra->children, @@ -464,13 +536,15 @@ element_get_tail(ElementObject* self) } static PyObject* -element(PyObject* self, PyObject* args, PyObject* kw) +subelement(PyObject* self, PyObject* args, PyObject* kw) { PyObject* elem; + ElementObject* parent; PyObject* tag; PyObject* attrib = NULL; - if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, + if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", + &Element_Type, &parent, &tag, &PyDict_Type, &attrib)) return NULL; @@ -485,63 +559,77 @@ element(PyObject* self, PyObject* args, PyObject* kw) attrib = Py_None; } - elem = element_new(tag, attrib); + elem = create_new_element(tag, attrib); Py_DECREF(attrib); + if (element_add_subelement(parent, elem) < 0) { + Py_DECREF(elem); + return NULL; + } + return elem; } -static PyObject* -subelement(PyObject* self, PyObject* args, PyObject* kw) +static int +element_gc_traverse(ElementObject *self, visitproc visit, void *arg) { - PyObject* elem; + Py_VISIT(self->tag); + Py_VISIT(JOIN_OBJ(self->text)); + Py_VISIT(JOIN_OBJ(self->tail)); - ElementObject* parent; - PyObject* tag; - PyObject* attrib = NULL; - if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", - &Element_Type, &parent, &tag, - &PyDict_Type, &attrib)) - return NULL; + if (self->extra) { + int i; + Py_VISIT(self->extra->attrib); - if (attrib || kw) { - attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); - if (!attrib) - return NULL; - if (kw) - PyDict_Update(attrib, kw); - } else { - Py_INCREF(Py_None); - attrib = Py_None; + for (i = 0; i < self->extra->length; ++i) + Py_VISIT(self->extra->children[i]); } + return 0; +} - elem = element_new(tag, attrib); +static int +element_gc_clear(ElementObject *self) +{ + Py_CLEAR(self->tag); - Py_DECREF(attrib); + /* The following is like Py_CLEAR for self->text and self->tail, but + * written explicitily because the real pointers hide behind access + * macros. + */ + if (self->text) { + PyObject *tmp = JOIN_OBJ(self->text); + self->text = NULL; + Py_DECREF(tmp); + } - if (element_add_subelement(parent, elem) < 0) { - Py_DECREF(elem); - return NULL; + if (self->tail) { + PyObject *tmp = JOIN_OBJ(self->tail); + self->tail = NULL; + Py_DECREF(tmp); } - return elem; + /* After dropping all references from extra, it's no longer valid anyway, + * so fully deallocate it. + */ + dealloc_extra(self); + return 0; } static void element_dealloc(ElementObject* self) { - if (self->extra) - element_dealloc_extra(self); + PyObject_GC_UnTrack(self); - /* discard attributes */ - Py_DECREF(self->tag); - Py_DECREF(JOIN_OBJ(self->text)); - Py_DECREF(JOIN_OBJ(self->tail)); + if (self->weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) self); - RELEASE(sizeof(ElementObject), "destroy element"); + /* element_gc_clear clears all references and deallocates extra + */ + element_gc_clear(self); - PyObject_Del(self); + RELEASE(sizeof(ElementObject), "destroy element"); + Py_TYPE(self)->tp_free((PyObject *)self); } /* -------------------------------------------------------------------- */ @@ -561,15 +649,12 @@ element_append(ElementObject* self, PyObject* args) } static PyObject* -element_clear(ElementObject* self, PyObject* args) +element_clearmethod(ElementObject* self, PyObject* args) { if (!PyArg_ParseTuple(args, ":clear")) return NULL; - if (self->extra) { - element_dealloc_extra(self); - self->extra = NULL; - } + dealloc_extra(self); Py_INCREF(Py_None); Py_DECREF(JOIN_OBJ(self->text)); @@ -591,7 +676,7 @@ element_copy(ElementObject* self, PyObject* args) if (!PyArg_ParseTuple(args, ":__copy__")) return NULL; - element = (ElementObject*) element_new( + element = (ElementObject*) create_new_element( self->tag, (self->extra) ? self->extra->attrib : Py_None ); if (!element) @@ -606,7 +691,7 @@ element_copy(ElementObject* self, PyObject* args) Py_INCREF(JOIN_OBJ(element->tail)); if (self->extra) { - + if (element_resize(element, self->extra->length) < 0) { Py_DECREF(element); return NULL; @@ -618,7 +703,7 @@ element_copy(ElementObject* self, PyObject* args) } element->extra->length = self->extra->length; - + } return (PyObject*) element; @@ -654,14 +739,14 @@ element_deepcopy(ElementObject* self, PyObject* args) attrib = Py_None; } - element = (ElementObject*) element_new(tag, attrib); + element = (ElementObject*) create_new_element(tag, attrib); Py_DECREF(tag); Py_DECREF(attrib); if (!element) return NULL; - + text = deepcopy(JOIN_OBJ(self->text), memo); if (!text) goto error; @@ -675,7 +760,7 @@ element_deepcopy(ElementObject* self, PyObject* args) element->tail = JOIN_SET(tail, JOIN_GET(self->tail)); if (self->extra) { - + if (element_resize(element, self->extra->length) < 0) goto error; @@ -689,7 +774,7 @@ element_deepcopy(ElementObject* self, PyObject* args) } element->extra->length = self->extra->length; - + } /* add object to memo dictionary (so deepcopy won't visit it again) */ @@ -723,13 +808,16 @@ checkpath(PyObject* tag) (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') if (PyUnicode_Check(tag)) { - Py_UNICODE *p = PyUnicode_AS_UNICODE(tag); - for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) { - if (p[i] == '{') + const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); + void *data = PyUnicode_DATA(tag); + unsigned int kind = PyUnicode_KIND(tag); + for (i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch == '{') check = 0; - else if (p[i] == '}') + else if (ch == '}') check = 1; - else if (check && PATHCHAR(p[i])) + else if (check && PATHCHAR(ch)) return 1; } return 0; @@ -772,6 +860,15 @@ element_extend(ElementObject* self, PyObject* args) seqlen = PySequence_Size(seq); for (i = 0; i < seqlen; i++) { PyObject* element = PySequence_Fast_GET_ITEM(seq, i); + if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) { + Py_DECREF(seq); + PyErr_Format( + PyExc_TypeError, + "expected an Element, not \"%.200s\"", + Py_TYPE(element)->tp_name); + return NULL; + } + if (element_add_subelement(self, element) < 0) { Py_DECREF(seq); return NULL; @@ -787,20 +884,22 @@ static PyObject* element_find(ElementObject* self, PyObject* args) { int i; - PyObject* tag; PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) return NULL; - if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "find", "OOO", self, tag, namespaces + if (checkpath(tag) || namespaces != Py_None) { + _Py_IDENTIFIER(find); + return _PyObject_CallMethodId( + elementpath_obj, &PyId_find, "OOO", self, tag, namespaces ); + } if (!self->extra) Py_RETURN_NONE; - + for (i = 0; i < self->extra->length; i++) { PyObject* item = self->extra->children[i]; if (Element_CheckExact(item) && @@ -817,16 +916,17 @@ static PyObject* element_findtext(ElementObject* self, PyObject* args) { int i; - PyObject* tag; PyObject* default_value = Py_None; PyObject* namespaces = Py_None; + _Py_IDENTIFIER(findtext); + if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) return NULL; if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces + return _PyObject_CallMethodId( + elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces ); if (!self->extra) { @@ -855,16 +955,18 @@ element_findall(ElementObject* self, PyObject* args) { int i; PyObject* out; - PyObject* tag; PyObject* namespaces = Py_None; + if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) return NULL; - if (checkpath(tag) || namespaces != Py_None) - return PyObject_CallMethod( - elementpath_obj, "findall", "OOO", self, tag, namespaces + if (checkpath(tag) || namespaces != Py_None) { + _Py_IDENTIFIER(findall); + return _PyObject_CallMethodId( + elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces ); + } out = PyList_New(0); if (!out) @@ -892,11 +994,13 @@ element_iterfind(ElementObject* self, PyObject* args) { PyObject* tag; PyObject* namespaces = Py_None; + _Py_IDENTIFIER(iterfind); + if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) return NULL; - return PyObject_CallMethod( - elementpath_obj, "iterfind", "OOO", self, tag, namespaces + return _PyObject_CallMethodId( + elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces ); } @@ -953,7 +1057,7 @@ static PyObject* element_iter(ElementObject* self, PyObject* args) { PyObject* result; - + PyObject* tag = Py_None; if (!PyArg_ParseTuple(args, "|O:iter", &tag)) return NULL; @@ -985,7 +1089,7 @@ static PyObject* element_itertext(ElementObject* self, PyObject* args) { PyObject* result; - + if (!PyArg_ParseTuple(args, ":itertext")) return NULL; @@ -1039,7 +1143,7 @@ element_insert(ElementObject* self, PyObject* args) return NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); if (index < 0) { index += self->extra->length; @@ -1110,7 +1214,7 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw) if (!attrib) return NULL; - elem = element_new(tag, attrib); + elem = create_new_element(tag, attrib); Py_DECREF(attrib); @@ -1118,31 +1222,6 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw) } static PyObject* -element_reduce(ElementObject* self, PyObject* args) -{ - if (!PyArg_ParseTuple(args, ":__reduce__")) - return NULL; - - /* Hack alert: This method is used to work around a __copy__ - problem on certain 2.3 and 2.4 versions. To save time and - simplify the code, we create the copy in here, and use a dummy - copyelement helper to trick the copy module into doing the - right thing. */ - - if (!elementtree_copyelement_obj) { - PyErr_SetString( - PyExc_RuntimeError, - "copyelement helper not found" - ); - return NULL; - } - - return Py_BuildValue( - "O(N)", elementtree_copyelement_obj, element_copy(self, args) - ); -} - -static PyObject* element_remove(ElementObject* self, PyObject* args) { int i; @@ -1189,7 +1268,10 @@ element_remove(ElementObject* self, PyObject* args) static PyObject* element_repr(ElementObject* self) { - return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); + if (self->tag) + return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); + else + return PyUnicode_FromFormat("<Element at %p>", self); } static PyObject* @@ -1203,7 +1285,7 @@ element_set(ElementObject* self, PyObject* args) return NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); attrib = element_get_attrib(self); if (!attrib) @@ -1250,13 +1332,8 @@ element_subscr(PyObject* self_, PyObject* item) { ElementObject* self = (ElementObject*) self_; -#if (PY_VERSION_HEX < 0x02050000) - if (PyInt_Check(item) || PyLong_Check(item)) { - long i = PyInt_AsLong(item); -#else if (PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); -#endif if (i == -1 && PyErr_Occurred()) { return NULL; @@ -1307,13 +1384,8 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) { ElementObject* self = (ElementObject*) self_; -#if (PY_VERSION_HEX < 0x02050000) - if (PyInt_Check(item) || PyLong_Check(item)) { - long i = PyInt_AsLong(item); -#else if (PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); -#endif if (i == -1 && PyErr_Occurred()) { return -1; @@ -1329,7 +1401,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) PyObject* seq = NULL; if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); if (PySlice_GetIndicesEx(item, self->extra->length, @@ -1337,9 +1409,74 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) return -1; } - if (value == NULL) - newlen = 0; + if (value == NULL) { + /* Delete slice */ + size_t cur; + Py_ssize_t i; + + if (slicelen <= 0) + return 0; + + /* Since we're deleting, the direction of the range doesn't matter, + * so for simplicity make it always ascending. + */ + if (step < 0) { + stop = start + 1; + start = stop + step * (slicelen - 1) - 1; + step = -step; + } + + assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *)); + + /* recycle is a list that will contain all the children + * scheduled for removal. + */ + if (!(recycle = PyList_New(slicelen))) { + PyErr_NoMemory(); + return -1; + } + + /* This loop walks over all the children that have to be deleted, + * with cur pointing at them. num_moved is the amount of children + * until the next deleted child that have to be "shifted down" to + * occupy the deleted's places. + * Note that in the ith iteration, shifting is done i+i places down + * because i children were already removed. + */ + for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { + /* Compute how many children have to be moved, clipping at the + * list end. + */ + Py_ssize_t num_moved = step - 1; + if (cur + step >= (size_t)self->extra->length) { + num_moved = self->extra->length - cur - 1; + } + + PyList_SET_ITEM(recycle, i, self->extra->children[cur]); + + memmove( + self->extra->children + cur - i, + self->extra->children + cur + 1, + num_moved * sizeof(PyObject *)); + } + + /* Leftover "tail" after the last removed child */ + cur = start + (size_t)slicelen * step; + if (cur < (size_t)self->extra->length) { + memmove( + self->extra->children + cur - slicelen, + self->extra->children + cur, + (self->extra->length - cur) * sizeof(PyObject *)); + } + + self->extra->length -= slicelen; + + /* Discard the recycle list with all the deleted sub-elements */ + Py_XDECREF(recycle); + return 0; + } else { + /* A new slice is actually being assigned */ seq = PySequence_Fast(value, ""); if (!seq) { PyErr_Format( @@ -1354,19 +1491,13 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) if (step != 1 && newlen != slicelen) { PyErr_Format(PyExc_ValueError, -#if (PY_VERSION_HEX < 0x02050000) - "attempt to assign sequence of size %d " - "to extended slice of size %d", -#else "attempt to assign sequence of size %zd " "to extended slice of size %zd", -#endif newlen, slicelen ); return -1; } - /* Resize before creating the recycle bin, to prevent refleaks. */ if (newlen > slicelen) { if (element_resize(self, newlen - slicelen) < 0) { @@ -1431,7 +1562,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) static PyMethodDef element_methods[] = { - {"clear", (PyCFunction) element_clear, METH_VARARGS}, + {"clear", (PyCFunction) element_clearmethod, METH_VARARGS}, {"get", (PyCFunction) element_get, METH_VARARGS}, {"set", (PyCFunction) element_set, METH_VARARGS}, @@ -1460,18 +1591,6 @@ static PyMethodDef element_methods[] = { {"__copy__", (PyCFunction) element_copy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS}, - /* Some 2.3 and 2.4 versions do not handle the __copy__ method on - C objects correctly, so we have to fake it using a __reduce__- - based hack (see the element_reduce implementation above for - details). */ - - /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're - using a runtime test to figure out if we need to fake things - or now (see the init code below). The following entry is - enabled only if the hack is needed. */ - - {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS}, - {NULL, NULL} }; @@ -1483,7 +1602,7 @@ element_getattro(ElementObject* self, PyObject* nameobj) if (PyUnicode_Check(nameobj)) name = _PyUnicode_AsString(nameobj); - + if (name == NULL) return NULL; @@ -1510,7 +1629,7 @@ element_getattro(ElementObject* self, PyObject* nameobj) } else if (strcmp(name, "attrib") == 0) { PyErr_Clear(); if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); res = element_get_attrib(self); } @@ -1546,7 +1665,7 @@ element_setattr(ElementObject* self, const char* name, PyObject* value) Py_INCREF(self->tail); } else if (strcmp(name, "attrib") == 0) { if (!self->extra) - element_new_extra(self, NULL); + create_extra(self, NULL); Py_DECREF(self->extra->attrib); self->extra->attrib = value; Py_INCREF(self->extra->attrib); @@ -1578,31 +1697,42 @@ static PyTypeObject Element_Type = { PyVarObject_HEAD_INIT(NULL, 0) "Element", sizeof(ElementObject), 0, /* methods */ - (destructor)element_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - (setattrfunc)element_setattr, /* tp_setattr */ - 0, /* tp_reserved */ - (reprfunc)element_repr, /* tp_repr */ - 0, /* tp_as_number */ - &element_as_sequence, /* tp_as_sequence */ - &element_as_mapping, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - (getattrofunc)element_getattro, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - element_methods, /* tp_methods */ - 0, /* tp_members */ + (destructor)element_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + (setattrfunc)element_setattr, /* tp_setattr */ + 0, /* tp_reserved */ + (reprfunc)element_repr, /* tp_repr */ + 0, /* tp_as_number */ + &element_as_sequence, /* tp_as_sequence */ + &element_as_mapping, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + (getattrofunc)element_getattro, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + (traverseproc)element_gc_traverse, /* tp_traverse */ + (inquiry)element_gc_clear, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + element_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)element_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + element_new, /* tp_new */ + 0, /* tp_free */ }; /* ==================================================================== */ @@ -1700,13 +1830,6 @@ treebuilder_dealloc(TreeBuilderObject* self) /* handlers */ LOCAL(PyObject*) -treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding, - PyObject* standalone) -{ - Py_RETURN_NONE; -} - -LOCAL(PyObject*) treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, PyObject* attrib) { @@ -1728,7 +1851,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, self->data = NULL; } - node = element_new(tag, attrib); + node = create_new_element(tag, attrib); if (!node) return NULL; @@ -1977,22 +2100,10 @@ treebuilder_start(TreeBuilderObject* self, PyObject* args) return treebuilder_handle_start(self, tag, attrib); } -static PyObject* -treebuilder_xml(TreeBuilderObject* self, PyObject* args) -{ - PyObject* encoding; - PyObject* standalone; - if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone)) - return NULL; - - return treebuilder_handle_xml(self, encoding, standalone); -} - static PyMethodDef treebuilder_methods[] = { {"data", (PyCFunction) treebuilder_data, METH_VARARGS}, {"start", (PyCFunction) treebuilder_start, METH_VARARGS}, {"end", (PyCFunction) treebuilder_end, METH_VARARGS}, - {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS}, {"close", (PyCFunction) treebuilder_close, METH_VARARGS}, {NULL, NULL} }; @@ -2053,8 +2164,6 @@ typedef struct { PyObject* names; - PyObject* handle_xml; - PyObject* handle_start; PyObject* handle_data; PyObject* handle_end; @@ -2113,7 +2222,7 @@ makeuniversal(XMLParserObject* self, const char* string) Py_INCREF(key); tag = key; } - + /* decode universal name */ p = PyBytes_AS_STRING(tag); value = PyUnicode_DecodeUTF8(p, size, "strict"); @@ -2135,20 +2244,39 @@ makeuniversal(XMLParserObject* self, const char* string) return value; } +/* Set the ParseError exception with the given parameters. + * If message is not NULL, it's used as the error string. Otherwise, the + * message string is the default for the given error_code. +*/ static void -expat_set_error(const char* message, int line, int column) +expat_set_error(enum XML_Error error_code, int line, int column, char *message) { - PyObject *error; - PyObject *position; - char buffer[256]; + PyObject *errmsg, *error, *position, *code; - sprintf(buffer, "%.100s: line %d, column %d", message, line, column); + errmsg = PyUnicode_FromFormat("%s: line %d, column %d", + message ? message : EXPAT(ErrorString)(error_code), + line, column); + if (errmsg == NULL) + return; - error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); + error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg); + Py_DECREF(errmsg); if (!error) return; - /* add position attribute */ + /* Add code and position attributes */ + code = PyLong_FromLong((long)error_code); + if (!code) { + Py_DECREF(error); + return; + } + if (PyObject_SetAttrString(error, "code", code) == -1) { + Py_DECREF(error); + Py_DECREF(code); + return; + } + Py_DECREF(code); + position = Py_BuildValue("(ii)", line, column); if (!position) { Py_DECREF(error); @@ -2200,9 +2328,10 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, char message[128] = "undefined entity "; strncat(message, data_in, data_len < 100?data_len:100); expat_set_error( - message, + XML_ERROR_UNDEFINED_ENTITY, EXPAT(GetErrorLineNumber)(self->parser), - EXPAT(GetErrorColumnNumber)(self->parser) + EXPAT(GetErrorColumnNumber)(self->parser), + message ); } @@ -2399,29 +2528,33 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name, XML_Encoding *info) { PyObject* u; - Py_UNICODE* p; unsigned char s[256]; int i; + void *data; + unsigned int kind; memset(info, 0, sizeof(XML_Encoding)); for (i = 0; i < 256; i++) s[i] = i; - + u = PyUnicode_Decode((char*) s, 256, name, "replace"); if (!u) return XML_STATUS_ERROR; + if (PyUnicode_READY(u)) + return XML_STATUS_ERROR; - if (PyUnicode_GET_SIZE(u) != 256) { + if (PyUnicode_GET_LENGTH(u) != 256) { Py_DECREF(u); return XML_STATUS_ERROR; } - p = PyUnicode_AS_UNICODE(u); - + kind = PyUnicode_KIND(u); + data = PyUnicode_DATA(u); for (i = 0; i < 256; i++) { - if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER) - info->map[i] = p[i]; + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (ch != Py_UNICODE_REPLACEMENT_CHARACTER) + info->map[i] = ch; else info->map[i] = -1; } @@ -2466,7 +2599,7 @@ xmlparser(PyObject* self_, PyObject* args, PyObject* kw) PyObject_Del(self); return NULL; } - + self->names = PyDict_New(); if (!self->names) { PyObject_Del(self->entity); @@ -2501,7 +2634,6 @@ xmlparser(PyObject* self_, PyObject* args, PyObject* kw) Py_INCREF(target); self->target = target; - self->handle_xml = PyObject_GetAttrString(target, "xml"); self->handle_start = PyObject_GetAttrString(target, "start"); self->handle_data = PyObject_GetAttrString(target, "data"); self->handle_end = PyObject_GetAttrString(target, "end"); @@ -2557,7 +2689,6 @@ xmlparser_dealloc(XMLParserObject* self) Py_XDECREF(self->handle_end); Py_XDECREF(self->handle_data); Py_XDECREF(self->handle_start); - Py_XDECREF(self->handle_xml); Py_DECREF(self->target); Py_DECREF(self->entity); @@ -2583,9 +2714,10 @@ expat_parse(XMLParserObject* self, char* data, int data_len, int final) if (!ok) { expat_set_error( - EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)), + EXPAT(GetErrorCode)(self->parser), EXPAT(GetErrorLineNumber)(self->parser), - EXPAT(GetErrorColumnNumber)(self->parser) + EXPAT(GetErrorColumnNumber)(self->parser), + NULL ); return NULL; } @@ -2636,6 +2768,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) PyObject* reader; PyObject* buffer; + PyObject* temp; PyObject* res; PyObject* fileobj; @@ -2645,7 +2778,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) reader = PyObject_GetAttrString(fileobj, "read"); if (!reader) return NULL; - + /* read from open file object */ for (;;) { @@ -2657,7 +2790,27 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) return NULL; } - if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { + if (PyUnicode_CheckExact(buffer)) { + /* A unicode object is encoded into bytes using UTF-8 */ + if (PyUnicode_GET_SIZE(buffer) == 0) { + Py_DECREF(buffer); + break; + } + temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); + if (!temp) { + /* Propagate exception from PyUnicode_AsEncodedString */ + Py_DECREF(buffer); + Py_DECREF(reader); + return NULL; + } + + /* Here we no longer need the original buffer since it contains + * unicode. Make it point to the encoded bytes object. + */ + Py_DECREF(buffer); + buffer = temp; + } + else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { Py_DECREF(buffer); break; } @@ -2705,7 +2858,7 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args) if (!TreeBuilder_CheckExact(self->target)) { PyErr_SetString( PyExc_TypeError, - "event handling only supported for cElementTree.Treebuilder " + "event handling only supported for ElementTree.TreeBuilder " "targets" ); return NULL; @@ -2796,7 +2949,7 @@ static PyMethodDef xmlparser_methods[] = { {NULL, NULL} }; -static PyObject* +static PyObject* xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) { if (PyUnicode_Check(nameobj)) { @@ -2857,12 +3010,10 @@ static PyTypeObject XMLParser_Type = { /* python module interface */ static PyMethodDef _functions[] = { - {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS}, {"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS}, {"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS}, #if defined(USE_EXPAT) {"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, - {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS}, #endif {NULL, NULL} }; @@ -2883,8 +3034,7 @@ static struct PyModuleDef _elementtreemodule = { PyMODINIT_FUNC PyInit__elementtree(void) { - PyObject* m; - PyObject* g; + PyObject *m, *g, *temp; char* bootstrap; /* Initialize object types */ @@ -2916,56 +3066,6 @@ PyInit__elementtree(void) PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins()); bootstrap = ( - - "from copy import copy, deepcopy\n" - - "try:\n" - " from xml.etree import ElementTree\n" - "except ImportError:\n" - " import ElementTree\n" - "ET = ElementTree\n" - "del ElementTree\n" - - "import _elementtree as cElementTree\n" - - "try:\n" /* check if copy works as is */ - " copy(cElementTree.Element('x'))\n" - "except:\n" - " def copyelement(elem):\n" - " return elem\n" - - "class CommentProxy:\n" - " def __call__(self, text=None):\n" - " element = cElementTree.Element(ET.Comment)\n" - " element.text = text\n" - " return element\n" - " def __eq__(self, other):\n" - " return ET.Comment == other\n" - "cElementTree.Comment = CommentProxy()\n" - - "class ElementTree(ET.ElementTree):\n" /* public */ - " def parse(self, source, parser=None):\n" - " close_source = False\n" - " if not hasattr(source, 'read'):\n" - " source = open(source, 'rb')\n" - " close_source = True\n" - " try:\n" - " if parser is not None:\n" - " while 1:\n" - " data = source.read(65536)\n" - " if not data:\n" - " break\n" - " parser.feed(data)\n" - " self._root = parser.close()\n" - " else:\n" - " parser = cElementTree.XMLParser()\n" - " self._root = parser._parse(source)\n" - " return self._root\n" - " finally:\n" - " if close_source:\n" - " source.close()\n" - "cElementTree.ElementTree = ElementTree\n" - "def iter(node, tag=None):\n" /* helper */ " if tag == '*':\n" " tag = None\n" @@ -2984,124 +3084,19 @@ PyInit__elementtree(void) " if e.tail:\n" " yield e.tail\n" - "def parse(source, parser=None):\n" /* public */ - " tree = ElementTree()\n" - " tree.parse(source, parser)\n" - " return tree\n" - "cElementTree.parse = parse\n" - - "class iterparse:\n" - " root = None\n" - " def __init__(self, file, events=None):\n" - " self._close_file = False\n" - " if not hasattr(file, 'read'):\n" - " file = open(file, 'rb')\n" - " self._close_file = True\n" - " self._file = file\n" - " self._events = []\n" - " self._index = 0\n" - " self._error = None\n" - " self.root = self._root = None\n" - " b = cElementTree.TreeBuilder()\n" - " self._parser = cElementTree.XMLParser(b)\n" - " self._parser._setevents(self._events, events)\n" - " def __next__(self):\n" - " while 1:\n" - " try:\n" - " item = self._events[self._index]\n" - " self._index += 1\n" - " return item\n" - " except IndexError:\n" - " pass\n" - " if self._error:\n" - " e = self._error\n" - " self._error = None\n" - " raise e\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" - " try:\n" - " self._parser.feed(data)\n" - " except SyntaxError as exc:\n" - " self._error = exc\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" - " def __iter__(self):\n" - " return self\n" - "cElementTree.iterparse = iterparse\n" - - "class PIProxy:\n" - " def __call__(self, target, text=None):\n" - " element = cElementTree.Element(ET.PI)\n" - " element.text = target\n" - " if text:\n" - " element.text = element.text + ' ' + text\n" - " return element\n" - " def __eq__(self, other):\n" - " return ET.PI == other\n" - "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n" - - "def XML(text):\n" /* public */ - " parser = cElementTree.XMLParser()\n" - " parser.feed(text)\n" - " return parser.close()\n" - "cElementTree.XML = cElementTree.fromstring = XML\n" - - "def XMLID(text):\n" /* public */ - " tree = XML(text)\n" - " ids = {}\n" - " for elem in tree.iter():\n" - " id = elem.get('id')\n" - " if id:\n" - " ids[id] = elem\n" - " return tree, ids\n" - "cElementTree.XMLID = XMLID\n" - - "try:\n" - " register_namespace = ET.register_namespace\n" - "except AttributeError:\n" - " def register_namespace(prefix, uri):\n" - " ET._namespace_map[uri] = prefix\n" - "cElementTree.register_namespace = register_namespace\n" - - "cElementTree.dump = ET.dump\n" - "cElementTree.ElementPath = ElementPath = ET.ElementPath\n" - "cElementTree.iselement = ET.iselement\n" - "cElementTree.QName = ET.QName\n" - "cElementTree.tostring = ET.tostring\n" - "cElementTree.fromstringlist = ET.fromstringlist\n" - "cElementTree.tostringlist = ET.tostringlist\n" - "cElementTree.VERSION = '" VERSION "'\n" - "cElementTree.__version__ = '" VERSION "'\n" - ); if (!PyRun_String(bootstrap, Py_file_input, g, NULL)) return NULL; - elementpath_obj = PyDict_GetItemString(g, "ElementPath"); + if (!(temp = PyImport_ImportModule("copy"))) + return NULL; + elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); + Py_XDECREF(temp); - elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement"); - if (elementtree_copyelement_obj) { - /* reduce hack needed; enable reduce method */ - PyMethodDef* mp; - for (mp = element_methods; mp->ml_name; mp++) - if (mp->ml_meth == (PyCFunction) element_reduce) { - mp->ml_name = "__reduce__"; - break; - } - } else - PyErr_Clear(); + if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) + return NULL; - elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy"); elementtree_iter_obj = PyDict_GetItemString(g, "iter"); elementtree_itertext_obj = PyDict_GetItemString(g, "itertext"); @@ -3120,10 +3115,13 @@ PyInit__elementtree(void) #endif elementtree_parseerror_obj = PyErr_NewException( - "cElementTree.ParseError", PyExc_SyntaxError, NULL + "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL ); Py_INCREF(elementtree_parseerror_obj); PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj); + Py_INCREF((PyObject *)&Element_Type); + PyModule_AddObject(m, "Element", (PyObject *)&Element_Type); + return m; } |