summaryrefslogtreecommitdiff
path: root/Modules/_elementtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_elementtree.c')
-rw-r--r--Modules/_elementtree.c834
1 files changed, 416 insertions, 418 deletions
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
index 788772113c..42634977da 100644
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -48,6 +48,7 @@
/* See http://www.python.org/psf/license for licensing details. */
#include "Python.h"
+#include "structmember.h"
#define VERSION "1.0.6"
@@ -70,7 +71,7 @@
helps if you have lots of leaf nodes with attributes). */
/* Also note that pymalloc always allocates blocks in multiples of
- eight bytes. For the current version of cElementTree, this means
+ eight bytes. For the current C version of ElementTree, this means
that the number of children should be an even number, at least on
32-bit platforms. */
@@ -94,25 +95,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
#define LOCAL(type) static type
#endif
-/* compatibility macros */
-#if (PY_VERSION_HEX < 0x02060000)
-#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
-#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
-#endif
-
-#if (PY_VERSION_HEX < 0x02050000)
-typedef int Py_ssize_t;
-#define lenfunc inquiry
-#endif
-
-#if (PY_VERSION_HEX < 0x02040000)
-#define PyDict_CheckExact PyDict_Check
-
-#if !defined(Py_RETURN_NONE)
-#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None
-#endif
-#endif
-
/* macros used to store 'join' flags in string object pointers. note
that all use of text and tail as object pointers must be wrapped in
JOIN_OBJ. see comments in the ElementObject definition for more
@@ -123,7 +105,6 @@ typedef int Py_ssize_t;
/* glue functions (see the init function for details) */
static PyObject* elementtree_parseerror_obj;
-static PyObject* elementtree_copyelement_obj;
static PyObject* elementtree_deepcopy_obj;
static PyObject* elementtree_iter_obj;
static PyObject* elementtree_itertext_obj;
@@ -211,7 +192,7 @@ list_join(PyObject* list)
}
/* -------------------------------------------------------------------- */
-/* the element type */
+/* the Element type */
typedef struct {
@@ -226,7 +207,7 @@ typedef struct {
PyObject* *children;
PyObject* _children[STATIC_CHILDREN];
-
+
} ElementObjectExtra;
typedef struct {
@@ -249,6 +230,8 @@ typedef struct {
ElementObjectExtra* extra;
+ PyObject *weakreflist; /* For tp_weaklistoffset */
+
} ElementObject;
static PyTypeObject Element_Type;
@@ -256,10 +239,10 @@ static PyTypeObject Element_Type;
#define Element_CheckExact(op) (Py_TYPE(op) == &Element_Type)
/* -------------------------------------------------------------------- */
-/* element constructor and destructor */
+/* Element constructors and destructor */
LOCAL(int)
-element_new_extra(ElementObject* self, PyObject* attrib)
+create_extra(ElementObject* self, PyObject* attrib)
{
self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
if (!self->extra)
@@ -279,27 +262,39 @@ element_new_extra(ElementObject* self, PyObject* attrib)
}
LOCAL(void)
-element_dealloc_extra(ElementObject* self)
+dealloc_extra(ElementObject* self)
{
+ ElementObjectExtra *myextra;
int i;
- Py_DECREF(self->extra->attrib);
+ if (!self->extra)
+ return;
+
+ /* Avoid DECREFs calling into this code again (cycles, etc.)
+ */
+ myextra = self->extra;
+ self->extra = NULL;
+
+ Py_DECREF(myextra->attrib);
- for (i = 0; i < self->extra->length; i++)
- Py_DECREF(self->extra->children[i]);
+ for (i = 0; i < myextra->length; i++)
+ Py_DECREF(myextra->children[i]);
- if (self->extra->children != self->extra->_children)
- PyObject_Free(self->extra->children);
+ if (myextra->children != myextra->_children)
+ PyObject_Free(myextra->children);
- PyObject_Free(self->extra);
+ PyObject_Free(myextra);
}
+/* Convenience internal function to create new Element objects with the given
+ * tag and attributes.
+*/
LOCAL(PyObject*)
-element_new(PyObject* tag, PyObject* attrib)
+create_new_element(PyObject* tag, PyObject* attrib)
{
ElementObject* self;
- self = PyObject_New(ElementObject, &Element_Type);
+ self = PyObject_GC_New(ElementObject, &Element_Type);
if (self == NULL)
return NULL;
@@ -310,16 +305,10 @@ element_new(PyObject* tag, PyObject* attrib)
self->extra = NULL;
if (attrib != Py_None) {
-
- if (element_new_extra(self, attrib) < 0) {
+ if (create_extra(self, attrib) < 0) {
PyObject_Del(self);
return NULL;
}
-
- self->extra->length = 0;
- self->extra->allocated = STATIC_CHILDREN;
- self->extra->children = self->extra->_children;
-
}
Py_INCREF(tag);
@@ -331,11 +320,94 @@ element_new(PyObject* tag, PyObject* attrib)
Py_INCREF(Py_None);
self->tail = Py_None;
- ALLOC(sizeof(ElementObject), "create element");
+ self->weakreflist = NULL;
+ ALLOC(sizeof(ElementObject), "create element");
+ PyObject_GC_Track(self);
return (PyObject*) self;
}
+static PyObject *
+element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
+ if (e != NULL) {
+ Py_INCREF(Py_None);
+ e->tag = Py_None;
+
+ Py_INCREF(Py_None);
+ e->text = Py_None;
+
+ Py_INCREF(Py_None);
+ e->tail = Py_None;
+
+ e->extra = NULL;
+ e->weakreflist = NULL;
+ }
+ return (PyObject *)e;
+}
+
+static int
+element_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ PyObject *tag;
+ PyObject *tmp;
+ PyObject *attrib = NULL;
+ ElementObject *self_elem;
+
+ if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
+ return -1;
+
+ if (attrib || kwds) {
+ attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
+ if (!attrib)
+ return -1;
+ if (kwds)
+ PyDict_Update(attrib, kwds);
+ } else {
+ Py_INCREF(Py_None);
+ attrib = Py_None;
+ }
+
+ self_elem = (ElementObject *)self;
+
+ /* Use None for empty dictionaries */
+ if (PyDict_CheckExact(attrib) && PyDict_Size(attrib) == 0) {
+ Py_INCREF(Py_None);
+ attrib = Py_None;
+ }
+
+ if (attrib != Py_None) {
+ if (create_extra(self_elem, attrib) < 0) {
+ PyObject_Del(self_elem);
+ return -1;
+ }
+ }
+
+ /* If create_extra needed attrib, it took a reference to it, so we can
+ * release ours anyway.
+ */
+ Py_DECREF(attrib);
+
+ /* Replace the objects already pointed to by tag, text and tail. */
+ tmp = self_elem->tag;
+ self_elem->tag = tag;
+ Py_INCREF(tag);
+ Py_DECREF(tmp);
+
+ tmp = self_elem->text;
+ self_elem->text = Py_None;
+ Py_INCREF(Py_None);
+ Py_DECREF(JOIN_OBJ(tmp));
+
+ tmp = self_elem->tail;
+ self_elem->tail = Py_None;
+ Py_INCREF(Py_None);
+ Py_DECREF(JOIN_OBJ(tmp));
+
+ return 0;
+}
+
LOCAL(int)
element_resize(ElementObject* self, int extra)
{
@@ -346,7 +418,7 @@ element_resize(ElementObject* self, int extra)
elements. set an exception and return -1 if allocation failed */
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
size = self->extra->length + extra;
@@ -354,14 +426,14 @@ element_resize(ElementObject* self, int extra)
/* use Python 2.4's list growth strategy */
size = (size >> 3) + (size < 9 ? 3 : 6) + size;
/* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
- * which needs at least 4 bytes.
- * Although it's a false alarm always assume at least one child to
+ * which needs at least 4 bytes.
+ * Although it's a false alarm always assume at least one child to
* be safe.
*/
size = size ? size : 1;
if (self->extra->children != self->extra->_children) {
/* Coverity CID #182 size_error: Allocating 1 bytes to pointer
- * "children", which needs at least 4 bytes. Although it's a
+ * "children", which needs at least 4 bytes. Although it's a
* false alarm always assume at least one child to be safe.
*/
children = PyObject_Realloc(self->extra->children,
@@ -464,13 +536,15 @@ element_get_tail(ElementObject* self)
}
static PyObject*
-element(PyObject* self, PyObject* args, PyObject* kw)
+subelement(PyObject* self, PyObject* args, PyObject* kw)
{
PyObject* elem;
+ ElementObject* parent;
PyObject* tag;
PyObject* attrib = NULL;
- if (!PyArg_ParseTuple(args, "O|O!:Element", &tag,
+ if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
+ &Element_Type, &parent, &tag,
&PyDict_Type, &attrib))
return NULL;
@@ -485,63 +559,77 @@ element(PyObject* self, PyObject* args, PyObject* kw)
attrib = Py_None;
}
- elem = element_new(tag, attrib);
+ elem = create_new_element(tag, attrib);
Py_DECREF(attrib);
+ if (element_add_subelement(parent, elem) < 0) {
+ Py_DECREF(elem);
+ return NULL;
+ }
+
return elem;
}
-static PyObject*
-subelement(PyObject* self, PyObject* args, PyObject* kw)
+static int
+element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
{
- PyObject* elem;
+ Py_VISIT(self->tag);
+ Py_VISIT(JOIN_OBJ(self->text));
+ Py_VISIT(JOIN_OBJ(self->tail));
- ElementObject* parent;
- PyObject* tag;
- PyObject* attrib = NULL;
- if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
- &Element_Type, &parent, &tag,
- &PyDict_Type, &attrib))
- return NULL;
+ if (self->extra) {
+ int i;
+ Py_VISIT(self->extra->attrib);
- if (attrib || kw) {
- attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New();
- if (!attrib)
- return NULL;
- if (kw)
- PyDict_Update(attrib, kw);
- } else {
- Py_INCREF(Py_None);
- attrib = Py_None;
+ for (i = 0; i < self->extra->length; ++i)
+ Py_VISIT(self->extra->children[i]);
}
+ return 0;
+}
- elem = element_new(tag, attrib);
+static int
+element_gc_clear(ElementObject *self)
+{
+ Py_CLEAR(self->tag);
- Py_DECREF(attrib);
+ /* The following is like Py_CLEAR for self->text and self->tail, but
+ * written explicitily because the real pointers hide behind access
+ * macros.
+ */
+ if (self->text) {
+ PyObject *tmp = JOIN_OBJ(self->text);
+ self->text = NULL;
+ Py_DECREF(tmp);
+ }
- if (element_add_subelement(parent, elem) < 0) {
- Py_DECREF(elem);
- return NULL;
+ if (self->tail) {
+ PyObject *tmp = JOIN_OBJ(self->tail);
+ self->tail = NULL;
+ Py_DECREF(tmp);
}
- return elem;
+ /* After dropping all references from extra, it's no longer valid anyway,
+ * so fully deallocate it.
+ */
+ dealloc_extra(self);
+ return 0;
}
static void
element_dealloc(ElementObject* self)
{
- if (self->extra)
- element_dealloc_extra(self);
+ PyObject_GC_UnTrack(self);
- /* discard attributes */
- Py_DECREF(self->tag);
- Py_DECREF(JOIN_OBJ(self->text));
- Py_DECREF(JOIN_OBJ(self->tail));
+ if (self->weakreflist != NULL)
+ PyObject_ClearWeakRefs((PyObject *) self);
- RELEASE(sizeof(ElementObject), "destroy element");
+ /* element_gc_clear clears all references and deallocates extra
+ */
+ element_gc_clear(self);
- PyObject_Del(self);
+ RELEASE(sizeof(ElementObject), "destroy element");
+ Py_TYPE(self)->tp_free((PyObject *)self);
}
/* -------------------------------------------------------------------- */
@@ -561,15 +649,12 @@ element_append(ElementObject* self, PyObject* args)
}
static PyObject*
-element_clear(ElementObject* self, PyObject* args)
+element_clearmethod(ElementObject* self, PyObject* args)
{
if (!PyArg_ParseTuple(args, ":clear"))
return NULL;
- if (self->extra) {
- element_dealloc_extra(self);
- self->extra = NULL;
- }
+ dealloc_extra(self);
Py_INCREF(Py_None);
Py_DECREF(JOIN_OBJ(self->text));
@@ -591,7 +676,7 @@ element_copy(ElementObject* self, PyObject* args)
if (!PyArg_ParseTuple(args, ":__copy__"))
return NULL;
- element = (ElementObject*) element_new(
+ element = (ElementObject*) create_new_element(
self->tag, (self->extra) ? self->extra->attrib : Py_None
);
if (!element)
@@ -606,7 +691,7 @@ element_copy(ElementObject* self, PyObject* args)
Py_INCREF(JOIN_OBJ(element->tail));
if (self->extra) {
-
+
if (element_resize(element, self->extra->length) < 0) {
Py_DECREF(element);
return NULL;
@@ -618,7 +703,7 @@ element_copy(ElementObject* self, PyObject* args)
}
element->extra->length = self->extra->length;
-
+
}
return (PyObject*) element;
@@ -654,14 +739,14 @@ element_deepcopy(ElementObject* self, PyObject* args)
attrib = Py_None;
}
- element = (ElementObject*) element_new(tag, attrib);
+ element = (ElementObject*) create_new_element(tag, attrib);
Py_DECREF(tag);
Py_DECREF(attrib);
if (!element)
return NULL;
-
+
text = deepcopy(JOIN_OBJ(self->text), memo);
if (!text)
goto error;
@@ -675,7 +760,7 @@ element_deepcopy(ElementObject* self, PyObject* args)
element->tail = JOIN_SET(tail, JOIN_GET(self->tail));
if (self->extra) {
-
+
if (element_resize(element, self->extra->length) < 0)
goto error;
@@ -689,7 +774,7 @@ element_deepcopy(ElementObject* self, PyObject* args)
}
element->extra->length = self->extra->length;
-
+
}
/* add object to memo dictionary (so deepcopy won't visit it again) */
@@ -723,13 +808,16 @@ checkpath(PyObject* tag)
(ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
if (PyUnicode_Check(tag)) {
- Py_UNICODE *p = PyUnicode_AS_UNICODE(tag);
- for (i = 0; i < PyUnicode_GET_SIZE(tag); i++) {
- if (p[i] == '{')
+ const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
+ void *data = PyUnicode_DATA(tag);
+ unsigned int kind = PyUnicode_KIND(tag);
+ for (i = 0; i < len; i++) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (ch == '{')
check = 0;
- else if (p[i] == '}')
+ else if (ch == '}')
check = 1;
- else if (check && PATHCHAR(p[i]))
+ else if (check && PATHCHAR(ch))
return 1;
}
return 0;
@@ -772,6 +860,15 @@ element_extend(ElementObject* self, PyObject* args)
seqlen = PySequence_Size(seq);
for (i = 0; i < seqlen; i++) {
PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
+ if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) {
+ Py_DECREF(seq);
+ PyErr_Format(
+ PyExc_TypeError,
+ "expected an Element, not \"%.200s\"",
+ Py_TYPE(element)->tp_name);
+ return NULL;
+ }
+
if (element_add_subelement(self, element) < 0) {
Py_DECREF(seq);
return NULL;
@@ -787,20 +884,22 @@ static PyObject*
element_find(ElementObject* self, PyObject* args)
{
int i;
-
PyObject* tag;
PyObject* namespaces = Py_None;
+
if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces))
return NULL;
- if (checkpath(tag) || namespaces != Py_None)
- return PyObject_CallMethod(
- elementpath_obj, "find", "OOO", self, tag, namespaces
+ if (checkpath(tag) || namespaces != Py_None) {
+ _Py_IDENTIFIER(find);
+ return _PyObject_CallMethodId(
+ elementpath_obj, &PyId_find, "OOO", self, tag, namespaces
);
+ }
if (!self->extra)
Py_RETURN_NONE;
-
+
for (i = 0; i < self->extra->length; i++) {
PyObject* item = self->extra->children[i];
if (Element_CheckExact(item) &&
@@ -817,16 +916,17 @@ static PyObject*
element_findtext(ElementObject* self, PyObject* args)
{
int i;
-
PyObject* tag;
PyObject* default_value = Py_None;
PyObject* namespaces = Py_None;
+ _Py_IDENTIFIER(findtext);
+
if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces))
return NULL;
if (checkpath(tag) || namespaces != Py_None)
- return PyObject_CallMethod(
- elementpath_obj, "findtext", "OOOO", self, tag, default_value, namespaces
+ return _PyObject_CallMethodId(
+ elementpath_obj, &PyId_findtext, "OOOO", self, tag, default_value, namespaces
);
if (!self->extra) {
@@ -855,16 +955,18 @@ element_findall(ElementObject* self, PyObject* args)
{
int i;
PyObject* out;
-
PyObject* tag;
PyObject* namespaces = Py_None;
+
if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces))
return NULL;
- if (checkpath(tag) || namespaces != Py_None)
- return PyObject_CallMethod(
- elementpath_obj, "findall", "OOO", self, tag, namespaces
+ if (checkpath(tag) || namespaces != Py_None) {
+ _Py_IDENTIFIER(findall);
+ return _PyObject_CallMethodId(
+ elementpath_obj, &PyId_findall, "OOO", self, tag, namespaces
);
+ }
out = PyList_New(0);
if (!out)
@@ -892,11 +994,13 @@ element_iterfind(ElementObject* self, PyObject* args)
{
PyObject* tag;
PyObject* namespaces = Py_None;
+ _Py_IDENTIFIER(iterfind);
+
if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces))
return NULL;
- return PyObject_CallMethod(
- elementpath_obj, "iterfind", "OOO", self, tag, namespaces
+ return _PyObject_CallMethodId(
+ elementpath_obj, &PyId_iterfind, "OOO", self, tag, namespaces
);
}
@@ -953,7 +1057,7 @@ static PyObject*
element_iter(ElementObject* self, PyObject* args)
{
PyObject* result;
-
+
PyObject* tag = Py_None;
if (!PyArg_ParseTuple(args, "|O:iter", &tag))
return NULL;
@@ -985,7 +1089,7 @@ static PyObject*
element_itertext(ElementObject* self, PyObject* args)
{
PyObject* result;
-
+
if (!PyArg_ParseTuple(args, ":itertext"))
return NULL;
@@ -1039,7 +1143,7 @@ element_insert(ElementObject* self, PyObject* args)
return NULL;
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
if (index < 0) {
index += self->extra->length;
@@ -1110,7 +1214,7 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
if (!attrib)
return NULL;
- elem = element_new(tag, attrib);
+ elem = create_new_element(tag, attrib);
Py_DECREF(attrib);
@@ -1118,31 +1222,6 @@ element_makeelement(PyObject* self, PyObject* args, PyObject* kw)
}
static PyObject*
-element_reduce(ElementObject* self, PyObject* args)
-{
- if (!PyArg_ParseTuple(args, ":__reduce__"))
- return NULL;
-
- /* Hack alert: This method is used to work around a __copy__
- problem on certain 2.3 and 2.4 versions. To save time and
- simplify the code, we create the copy in here, and use a dummy
- copyelement helper to trick the copy module into doing the
- right thing. */
-
- if (!elementtree_copyelement_obj) {
- PyErr_SetString(
- PyExc_RuntimeError,
- "copyelement helper not found"
- );
- return NULL;
- }
-
- return Py_BuildValue(
- "O(N)", elementtree_copyelement_obj, element_copy(self, args)
- );
-}
-
-static PyObject*
element_remove(ElementObject* self, PyObject* args)
{
int i;
@@ -1189,7 +1268,10 @@ element_remove(ElementObject* self, PyObject* args)
static PyObject*
element_repr(ElementObject* self)
{
- return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
+ if (self->tag)
+ return PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
+ else
+ return PyUnicode_FromFormat("<Element at %p>", self);
}
static PyObject*
@@ -1203,7 +1285,7 @@ element_set(ElementObject* self, PyObject* args)
return NULL;
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
attrib = element_get_attrib(self);
if (!attrib)
@@ -1250,13 +1332,8 @@ element_subscr(PyObject* self_, PyObject* item)
{
ElementObject* self = (ElementObject*) self_;
-#if (PY_VERSION_HEX < 0x02050000)
- if (PyInt_Check(item) || PyLong_Check(item)) {
- long i = PyInt_AsLong(item);
-#else
if (PyIndex_Check(item)) {
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
-#endif
if (i == -1 && PyErr_Occurred()) {
return NULL;
@@ -1307,13 +1384,8 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
{
ElementObject* self = (ElementObject*) self_;
-#if (PY_VERSION_HEX < 0x02050000)
- if (PyInt_Check(item) || PyLong_Check(item)) {
- long i = PyInt_AsLong(item);
-#else
if (PyIndex_Check(item)) {
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
-#endif
if (i == -1 && PyErr_Occurred()) {
return -1;
@@ -1329,7 +1401,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
PyObject* seq = NULL;
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
if (PySlice_GetIndicesEx(item,
self->extra->length,
@@ -1337,9 +1409,74 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
return -1;
}
- if (value == NULL)
- newlen = 0;
+ if (value == NULL) {
+ /* Delete slice */
+ size_t cur;
+ Py_ssize_t i;
+
+ if (slicelen <= 0)
+ return 0;
+
+ /* Since we're deleting, the direction of the range doesn't matter,
+ * so for simplicity make it always ascending.
+ */
+ if (step < 0) {
+ stop = start + 1;
+ start = stop + step * (slicelen - 1) - 1;
+ step = -step;
+ }
+
+ assert((size_t)slicelen <= PY_SIZE_MAX / sizeof(PyObject *));
+
+ /* recycle is a list that will contain all the children
+ * scheduled for removal.
+ */
+ if (!(recycle = PyList_New(slicelen))) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ /* This loop walks over all the children that have to be deleted,
+ * with cur pointing at them. num_moved is the amount of children
+ * until the next deleted child that have to be "shifted down" to
+ * occupy the deleted's places.
+ * Note that in the ith iteration, shifting is done i+i places down
+ * because i children were already removed.
+ */
+ for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
+ /* Compute how many children have to be moved, clipping at the
+ * list end.
+ */
+ Py_ssize_t num_moved = step - 1;
+ if (cur + step >= (size_t)self->extra->length) {
+ num_moved = self->extra->length - cur - 1;
+ }
+
+ PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
+
+ memmove(
+ self->extra->children + cur - i,
+ self->extra->children + cur + 1,
+ num_moved * sizeof(PyObject *));
+ }
+
+ /* Leftover "tail" after the last removed child */
+ cur = start + (size_t)slicelen * step;
+ if (cur < (size_t)self->extra->length) {
+ memmove(
+ self->extra->children + cur - slicelen,
+ self->extra->children + cur,
+ (self->extra->length - cur) * sizeof(PyObject *));
+ }
+
+ self->extra->length -= slicelen;
+
+ /* Discard the recycle list with all the deleted sub-elements */
+ Py_XDECREF(recycle);
+ return 0;
+ }
else {
+ /* A new slice is actually being assigned */
seq = PySequence_Fast(value, "");
if (!seq) {
PyErr_Format(
@@ -1354,19 +1491,13 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
if (step != 1 && newlen != slicelen)
{
PyErr_Format(PyExc_ValueError,
-#if (PY_VERSION_HEX < 0x02050000)
- "attempt to assign sequence of size %d "
- "to extended slice of size %d",
-#else
"attempt to assign sequence of size %zd "
"to extended slice of size %zd",
-#endif
newlen, slicelen
);
return -1;
}
-
/* Resize before creating the recycle bin, to prevent refleaks. */
if (newlen > slicelen) {
if (element_resize(self, newlen - slicelen) < 0) {
@@ -1431,7 +1562,7 @@ element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
static PyMethodDef element_methods[] = {
- {"clear", (PyCFunction) element_clear, METH_VARARGS},
+ {"clear", (PyCFunction) element_clearmethod, METH_VARARGS},
{"get", (PyCFunction) element_get, METH_VARARGS},
{"set", (PyCFunction) element_set, METH_VARARGS},
@@ -1460,18 +1591,6 @@ static PyMethodDef element_methods[] = {
{"__copy__", (PyCFunction) element_copy, METH_VARARGS},
{"__deepcopy__", (PyCFunction) element_deepcopy, METH_VARARGS},
- /* Some 2.3 and 2.4 versions do not handle the __copy__ method on
- C objects correctly, so we have to fake it using a __reduce__-
- based hack (see the element_reduce implementation above for
- details). */
-
- /* The behaviour has been changed in 2.3.5 and 2.4.1, so we're
- using a runtime test to figure out if we need to fake things
- or now (see the init code below). The following entry is
- enabled only if the hack is needed. */
-
- {"!__reduce__", (PyCFunction) element_reduce, METH_VARARGS},
-
{NULL, NULL}
};
@@ -1483,7 +1602,7 @@ element_getattro(ElementObject* self, PyObject* nameobj)
if (PyUnicode_Check(nameobj))
name = _PyUnicode_AsString(nameobj);
-
+
if (name == NULL)
return NULL;
@@ -1510,7 +1629,7 @@ element_getattro(ElementObject* self, PyObject* nameobj)
} else if (strcmp(name, "attrib") == 0) {
PyErr_Clear();
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
res = element_get_attrib(self);
}
@@ -1546,7 +1665,7 @@ element_setattr(ElementObject* self, const char* name, PyObject* value)
Py_INCREF(self->tail);
} else if (strcmp(name, "attrib") == 0) {
if (!self->extra)
- element_new_extra(self, NULL);
+ create_extra(self, NULL);
Py_DECREF(self->extra->attrib);
self->extra->attrib = value;
Py_INCREF(self->extra->attrib);
@@ -1578,31 +1697,42 @@ static PyTypeObject Element_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
"Element", sizeof(ElementObject), 0,
/* methods */
- (destructor)element_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- (setattrfunc)element_setattr, /* tp_setattr */
- 0, /* tp_reserved */
- (reprfunc)element_repr, /* tp_repr */
- 0, /* tp_as_number */
- &element_as_sequence, /* tp_as_sequence */
- &element_as_mapping, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- (getattrofunc)element_getattro, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- element_methods, /* tp_methods */
- 0, /* tp_members */
+ (destructor)element_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ (setattrfunc)element_setattr, /* tp_setattr */
+ 0, /* tp_reserved */
+ (reprfunc)element_repr, /* tp_repr */
+ 0, /* tp_as_number */
+ &element_as_sequence, /* tp_as_sequence */
+ &element_as_mapping, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ (getattrofunc)element_getattro, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+ /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)element_gc_traverse, /* tp_traverse */
+ (inquiry)element_gc_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ element_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)element_init, /* tp_init */
+ PyType_GenericAlloc, /* tp_alloc */
+ element_new, /* tp_new */
+ 0, /* tp_free */
};
/* ==================================================================== */
@@ -1700,13 +1830,6 @@ treebuilder_dealloc(TreeBuilderObject* self)
/* handlers */
LOCAL(PyObject*)
-treebuilder_handle_xml(TreeBuilderObject* self, PyObject* encoding,
- PyObject* standalone)
-{
- Py_RETURN_NONE;
-}
-
-LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
PyObject* attrib)
{
@@ -1728,7 +1851,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
self->data = NULL;
}
- node = element_new(tag, attrib);
+ node = create_new_element(tag, attrib);
if (!node)
return NULL;
@@ -1977,22 +2100,10 @@ treebuilder_start(TreeBuilderObject* self, PyObject* args)
return treebuilder_handle_start(self, tag, attrib);
}
-static PyObject*
-treebuilder_xml(TreeBuilderObject* self, PyObject* args)
-{
- PyObject* encoding;
- PyObject* standalone;
- if (!PyArg_ParseTuple(args, "OO:xml", &encoding, &standalone))
- return NULL;
-
- return treebuilder_handle_xml(self, encoding, standalone);
-}
-
static PyMethodDef treebuilder_methods[] = {
{"data", (PyCFunction) treebuilder_data, METH_VARARGS},
{"start", (PyCFunction) treebuilder_start, METH_VARARGS},
{"end", (PyCFunction) treebuilder_end, METH_VARARGS},
- {"xml", (PyCFunction) treebuilder_xml, METH_VARARGS},
{"close", (PyCFunction) treebuilder_close, METH_VARARGS},
{NULL, NULL}
};
@@ -2053,8 +2164,6 @@ typedef struct {
PyObject* names;
- PyObject* handle_xml;
-
PyObject* handle_start;
PyObject* handle_data;
PyObject* handle_end;
@@ -2113,7 +2222,7 @@ makeuniversal(XMLParserObject* self, const char* string)
Py_INCREF(key);
tag = key;
}
-
+
/* decode universal name */
p = PyBytes_AS_STRING(tag);
value = PyUnicode_DecodeUTF8(p, size, "strict");
@@ -2135,20 +2244,39 @@ makeuniversal(XMLParserObject* self, const char* string)
return value;
}
+/* Set the ParseError exception with the given parameters.
+ * If message is not NULL, it's used as the error string. Otherwise, the
+ * message string is the default for the given error_code.
+*/
static void
-expat_set_error(const char* message, int line, int column)
+expat_set_error(enum XML_Error error_code, int line, int column, char *message)
{
- PyObject *error;
- PyObject *position;
- char buffer[256];
+ PyObject *errmsg, *error, *position, *code;
- sprintf(buffer, "%.100s: line %d, column %d", message, line, column);
+ errmsg = PyUnicode_FromFormat("%s: line %d, column %d",
+ message ? message : EXPAT(ErrorString)(error_code),
+ line, column);
+ if (errmsg == NULL)
+ return;
- error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer);
+ error = PyObject_CallFunction(elementtree_parseerror_obj, "O", errmsg);
+ Py_DECREF(errmsg);
if (!error)
return;
- /* add position attribute */
+ /* Add code and position attributes */
+ code = PyLong_FromLong((long)error_code);
+ if (!code) {
+ Py_DECREF(error);
+ return;
+ }
+ if (PyObject_SetAttrString(error, "code", code) == -1) {
+ Py_DECREF(error);
+ Py_DECREF(code);
+ return;
+ }
+ Py_DECREF(code);
+
position = Py_BuildValue("(ii)", line, column);
if (!position) {
Py_DECREF(error);
@@ -2200,9 +2328,10 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
char message[128] = "undefined entity ";
strncat(message, data_in, data_len < 100?data_len:100);
expat_set_error(
- message,
+ XML_ERROR_UNDEFINED_ENTITY,
EXPAT(GetErrorLineNumber)(self->parser),
- EXPAT(GetErrorColumnNumber)(self->parser)
+ EXPAT(GetErrorColumnNumber)(self->parser),
+ message
);
}
@@ -2399,29 +2528,33 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
XML_Encoding *info)
{
PyObject* u;
- Py_UNICODE* p;
unsigned char s[256];
int i;
+ void *data;
+ unsigned int kind;
memset(info, 0, sizeof(XML_Encoding));
for (i = 0; i < 256; i++)
s[i] = i;
-
+
u = PyUnicode_Decode((char*) s, 256, name, "replace");
if (!u)
return XML_STATUS_ERROR;
+ if (PyUnicode_READY(u))
+ return XML_STATUS_ERROR;
- if (PyUnicode_GET_SIZE(u) != 256) {
+ if (PyUnicode_GET_LENGTH(u) != 256) {
Py_DECREF(u);
return XML_STATUS_ERROR;
}
- p = PyUnicode_AS_UNICODE(u);
-
+ kind = PyUnicode_KIND(u);
+ data = PyUnicode_DATA(u);
for (i = 0; i < 256; i++) {
- if (p[i] != Py_UNICODE_REPLACEMENT_CHARACTER)
- info->map[i] = p[i];
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+ if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
+ info->map[i] = ch;
else
info->map[i] = -1;
}
@@ -2466,7 +2599,7 @@ xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
PyObject_Del(self);
return NULL;
}
-
+
self->names = PyDict_New();
if (!self->names) {
PyObject_Del(self->entity);
@@ -2501,7 +2634,6 @@ xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
Py_INCREF(target);
self->target = target;
- self->handle_xml = PyObject_GetAttrString(target, "xml");
self->handle_start = PyObject_GetAttrString(target, "start");
self->handle_data = PyObject_GetAttrString(target, "data");
self->handle_end = PyObject_GetAttrString(target, "end");
@@ -2557,7 +2689,6 @@ xmlparser_dealloc(XMLParserObject* self)
Py_XDECREF(self->handle_end);
Py_XDECREF(self->handle_data);
Py_XDECREF(self->handle_start);
- Py_XDECREF(self->handle_xml);
Py_DECREF(self->target);
Py_DECREF(self->entity);
@@ -2583,9 +2714,10 @@ expat_parse(XMLParserObject* self, char* data, int data_len, int final)
if (!ok) {
expat_set_error(
- EXPAT(ErrorString)(EXPAT(GetErrorCode)(self->parser)),
+ EXPAT(GetErrorCode)(self->parser),
EXPAT(GetErrorLineNumber)(self->parser),
- EXPAT(GetErrorColumnNumber)(self->parser)
+ EXPAT(GetErrorColumnNumber)(self->parser),
+ NULL
);
return NULL;
}
@@ -2636,6 +2768,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
PyObject* reader;
PyObject* buffer;
+ PyObject* temp;
PyObject* res;
PyObject* fileobj;
@@ -2645,7 +2778,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
reader = PyObject_GetAttrString(fileobj, "read");
if (!reader)
return NULL;
-
+
/* read from open file object */
for (;;) {
@@ -2657,7 +2790,27 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
return NULL;
}
- if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
+ if (PyUnicode_CheckExact(buffer)) {
+ /* A unicode object is encoded into bytes using UTF-8 */
+ if (PyUnicode_GET_SIZE(buffer) == 0) {
+ Py_DECREF(buffer);
+ break;
+ }
+ temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
+ if (!temp) {
+ /* Propagate exception from PyUnicode_AsEncodedString */
+ Py_DECREF(buffer);
+ Py_DECREF(reader);
+ return NULL;
+ }
+
+ /* Here we no longer need the original buffer since it contains
+ * unicode. Make it point to the encoded bytes object.
+ */
+ Py_DECREF(buffer);
+ buffer = temp;
+ }
+ else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Py_DECREF(buffer);
break;
}
@@ -2705,7 +2858,7 @@ xmlparser_setevents(XMLParserObject* self, PyObject* args)
if (!TreeBuilder_CheckExact(self->target)) {
PyErr_SetString(
PyExc_TypeError,
- "event handling only supported for cElementTree.Treebuilder "
+ "event handling only supported for ElementTree.TreeBuilder "
"targets"
);
return NULL;
@@ -2796,7 +2949,7 @@ static PyMethodDef xmlparser_methods[] = {
{NULL, NULL}
};
-static PyObject*
+static PyObject*
xmlparser_getattro(XMLParserObject* self, PyObject* nameobj)
{
if (PyUnicode_Check(nameobj)) {
@@ -2857,12 +3010,10 @@ static PyTypeObject XMLParser_Type = {
/* python module interface */
static PyMethodDef _functions[] = {
- {"Element", (PyCFunction) element, METH_VARARGS|METH_KEYWORDS},
{"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
{"TreeBuilder", (PyCFunction) treebuilder, METH_VARARGS},
#if defined(USE_EXPAT)
{"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
- {"XMLTreeBuilder", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
#endif
{NULL, NULL}
};
@@ -2883,8 +3034,7 @@ static struct PyModuleDef _elementtreemodule = {
PyMODINIT_FUNC
PyInit__elementtree(void)
{
- PyObject* m;
- PyObject* g;
+ PyObject *m, *g, *temp;
char* bootstrap;
/* Initialize object types */
@@ -2916,56 +3066,6 @@ PyInit__elementtree(void)
PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
bootstrap = (
-
- "from copy import copy, deepcopy\n"
-
- "try:\n"
- " from xml.etree import ElementTree\n"
- "except ImportError:\n"
- " import ElementTree\n"
- "ET = ElementTree\n"
- "del ElementTree\n"
-
- "import _elementtree as cElementTree\n"
-
- "try:\n" /* check if copy works as is */
- " copy(cElementTree.Element('x'))\n"
- "except:\n"
- " def copyelement(elem):\n"
- " return elem\n"
-
- "class CommentProxy:\n"
- " def __call__(self, text=None):\n"
- " element = cElementTree.Element(ET.Comment)\n"
- " element.text = text\n"
- " return element\n"
- " def __eq__(self, other):\n"
- " return ET.Comment == other\n"
- "cElementTree.Comment = CommentProxy()\n"
-
- "class ElementTree(ET.ElementTree):\n" /* public */
- " def parse(self, source, parser=None):\n"
- " close_source = False\n"
- " if not hasattr(source, 'read'):\n"
- " source = open(source, 'rb')\n"
- " close_source = True\n"
- " try:\n"
- " if parser is not None:\n"
- " while 1:\n"
- " data = source.read(65536)\n"
- " if not data:\n"
- " break\n"
- " parser.feed(data)\n"
- " self._root = parser.close()\n"
- " else:\n"
- " parser = cElementTree.XMLParser()\n"
- " self._root = parser._parse(source)\n"
- " return self._root\n"
- " finally:\n"
- " if close_source:\n"
- " source.close()\n"
- "cElementTree.ElementTree = ElementTree\n"
-
"def iter(node, tag=None):\n" /* helper */
" if tag == '*':\n"
" tag = None\n"
@@ -2984,124 +3084,19 @@ PyInit__elementtree(void)
" if e.tail:\n"
" yield e.tail\n"
- "def parse(source, parser=None):\n" /* public */
- " tree = ElementTree()\n"
- " tree.parse(source, parser)\n"
- " return tree\n"
- "cElementTree.parse = parse\n"
-
- "class iterparse:\n"
- " root = None\n"
- " def __init__(self, file, events=None):\n"
- " self._close_file = False\n"
- " if not hasattr(file, 'read'):\n"
- " file = open(file, 'rb')\n"
- " self._close_file = True\n"
- " self._file = file\n"
- " self._events = []\n"
- " self._index = 0\n"
- " self._error = None\n"
- " self.root = self._root = None\n"
- " b = cElementTree.TreeBuilder()\n"
- " self._parser = cElementTree.XMLParser(b)\n"
- " self._parser._setevents(self._events, events)\n"
- " def __next__(self):\n"
- " while 1:\n"
- " try:\n"
- " item = self._events[self._index]\n"
- " self._index += 1\n"
- " return item\n"
- " except IndexError:\n"
- " pass\n"
- " if self._error:\n"
- " e = self._error\n"
- " self._error = None\n"
- " raise e\n"
- " if self._parser is None:\n"
- " self.root = self._root\n"
- " if self._close_file:\n"
- " self._file.close()\n"
- " raise StopIteration\n"
- " # load event buffer\n"
- " del self._events[:]\n"
- " self._index = 0\n"
- " data = self._file.read(16384)\n"
- " if data:\n"
- " try:\n"
- " self._parser.feed(data)\n"
- " except SyntaxError as exc:\n"
- " self._error = exc\n"
- " else:\n"
- " self._root = self._parser.close()\n"
- " self._parser = None\n"
- " def __iter__(self):\n"
- " return self\n"
- "cElementTree.iterparse = iterparse\n"
-
- "class PIProxy:\n"
- " def __call__(self, target, text=None):\n"
- " element = cElementTree.Element(ET.PI)\n"
- " element.text = target\n"
- " if text:\n"
- " element.text = element.text + ' ' + text\n"
- " return element\n"
- " def __eq__(self, other):\n"
- " return ET.PI == other\n"
- "cElementTree.PI = cElementTree.ProcessingInstruction = PIProxy()\n"
-
- "def XML(text):\n" /* public */
- " parser = cElementTree.XMLParser()\n"
- " parser.feed(text)\n"
- " return parser.close()\n"
- "cElementTree.XML = cElementTree.fromstring = XML\n"
-
- "def XMLID(text):\n" /* public */
- " tree = XML(text)\n"
- " ids = {}\n"
- " for elem in tree.iter():\n"
- " id = elem.get('id')\n"
- " if id:\n"
- " ids[id] = elem\n"
- " return tree, ids\n"
- "cElementTree.XMLID = XMLID\n"
-
- "try:\n"
- " register_namespace = ET.register_namespace\n"
- "except AttributeError:\n"
- " def register_namespace(prefix, uri):\n"
- " ET._namespace_map[uri] = prefix\n"
- "cElementTree.register_namespace = register_namespace\n"
-
- "cElementTree.dump = ET.dump\n"
- "cElementTree.ElementPath = ElementPath = ET.ElementPath\n"
- "cElementTree.iselement = ET.iselement\n"
- "cElementTree.QName = ET.QName\n"
- "cElementTree.tostring = ET.tostring\n"
- "cElementTree.fromstringlist = ET.fromstringlist\n"
- "cElementTree.tostringlist = ET.tostringlist\n"
- "cElementTree.VERSION = '" VERSION "'\n"
- "cElementTree.__version__ = '" VERSION "'\n"
-
);
if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
return NULL;
- elementpath_obj = PyDict_GetItemString(g, "ElementPath");
+ if (!(temp = PyImport_ImportModule("copy")))
+ return NULL;
+ elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
+ Py_XDECREF(temp);
- elementtree_copyelement_obj = PyDict_GetItemString(g, "copyelement");
- if (elementtree_copyelement_obj) {
- /* reduce hack needed; enable reduce method */
- PyMethodDef* mp;
- for (mp = element_methods; mp->ml_name; mp++)
- if (mp->ml_meth == (PyCFunction) element_reduce) {
- mp->ml_name = "__reduce__";
- break;
- }
- } else
- PyErr_Clear();
+ if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
+ return NULL;
- elementtree_deepcopy_obj = PyDict_GetItemString(g, "deepcopy");
elementtree_iter_obj = PyDict_GetItemString(g, "iter");
elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
@@ -3120,10 +3115,13 @@ PyInit__elementtree(void)
#endif
elementtree_parseerror_obj = PyErr_NewException(
- "cElementTree.ParseError", PyExc_SyntaxError, NULL
+ "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
);
Py_INCREF(elementtree_parseerror_obj);
PyModule_AddObject(m, "ParseError", elementtree_parseerror_obj);
+ Py_INCREF((PyObject *)&Element_Type);
+ PyModule_AddObject(m, "Element", (PyObject *)&Element_Type);
+
return m;
}