summaryrefslogtreecommitdiff
path: root/src/lxml/objectpath.pxi
diff options
context:
space:
mode:
authorscoder <none@none>2007-09-11 12:03:54 +0200
committerscoder <none@none>2007-09-11 12:03:54 +0200
commitdebae622334df649add61cb213d3588aa6306782 (patch)
tree808f8d239538d89b14ebe98273b84e141490deec /src/lxml/objectpath.pxi
parent2f21e264c3f0b67106f00d93d073a86ddd9eb0a5 (diff)
downloadpython-lxml-debae622334df649add61cb213d3588aa6306782.tar.gz
[svn r2834] moved objectpath implementation to separate .pxi
--HG-- branch : trunk rename : src/lxml/objectify.pyx => src/lxml/objectpath.pxi
Diffstat (limited to 'src/lxml/objectpath.pxi')
-rw-r--r--src/lxml/objectpath.pxi335
1 files changed, 335 insertions, 0 deletions
diff --git a/src/lxml/objectpath.pxi b/src/lxml/objectpath.pxi
new file mode 100644
index 00000000..e0c39cc0
--- /dev/null
+++ b/src/lxml/objectpath.pxi
@@ -0,0 +1,335 @@
+################################################################################
+# ObjectPath
+
+ctypedef struct _ObjectPath:
+ char* href
+ char* name
+ Py_ssize_t index
+
+
+cdef class ObjectPath:
+ """Immutable object that represents a compiled object path.
+
+ Example for a path: 'root.child[1].{other}child[25]'
+ """
+ cdef readonly object find
+ cdef object _path
+ cdef object _path_str
+ cdef _ObjectPath* _c_path
+ cdef Py_ssize_t _path_len
+ def __init__(self, path):
+ if python._isString(path):
+ self._path = _parseObjectPathString(path)
+ self._path_str = path
+ else:
+ self._path = _parseObjectPathList(path)
+ self._path_str = '.'.join(path)
+ self._path_len = python.PyList_GET_SIZE(self._path)
+ self._c_path = _buildObjectPathSegments(self._path)
+ self.find = self.__call__
+
+ def __dealloc__(self):
+ if self._c_path is not NULL:
+ python.PyMem_Free(self._c_path)
+
+ def __str__(self):
+ return self._path_str
+
+ def __call__(self, _Element root not None, *default):
+ """Follow the attribute path in the object structure and return the
+ target attribute value.
+
+ If it it not found, either returns a default value (if one was passed
+ as second argument) or raises AttributeError.
+ """
+ cdef Py_ssize_t use_default
+ use_default = python.PyTuple_GET_SIZE(default)
+ if use_default == 1:
+ default = python.PyTuple_GET_ITEM(default, 0)
+ python.Py_INCREF(default)
+ use_default = 1
+ elif use_default > 1:
+ raise TypeError, "invalid number of arguments: needs one or two"
+ return _findObjectPath(root, self._c_path, self._path_len,
+ default, use_default)
+
+ def hasattr(self, _Element root not None):
+ try:
+ _findObjectPath(root, self._c_path, self._path_len, None, 0)
+ except AttributeError:
+ return False
+ return True
+
+ def setattr(self, _Element root not None, value):
+ """Set the value of the target element in a subtree.
+
+ If any of the children on the path does not exist, it is created.
+ """
+ _createObjectPath(root, self._c_path, self._path_len, 1, value)
+
+ def addattr(self, _Element root not None, value):
+ """Append a value to the target element in a subtree.
+
+ If any of the children on the path does not exist, it is created.
+ """
+ _createObjectPath(root, self._c_path, self._path_len, 0, value)
+
+cdef object __MATCH_PATH_SEGMENT
+__MATCH_PATH_SEGMENT = re.compile(
+ r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
+ re.U).match
+
+cdef object _RELATIVE_PATH_SEGMENT
+_RELATIVE_PATH_SEGMENT = (None, None, 0)
+
+cdef _parseObjectPathString(path):
+ """Parse object path string into a 'hrefOnameOhrefOnameOOO' string and an
+ index list. The index list is None if no index was used in the path.
+ """
+ cdef int has_dot
+ new_path = []
+ path = cetree.utf8(path.strip())
+ if path == '.':
+ return [_RELATIVE_PATH_SEGMENT]
+ path_pos = 0
+ while python.PyString_GET_SIZE(path) > 0:
+ match = __MATCH_PATH_SEGMENT(path, path_pos)
+ if match is None:
+ break
+
+ dot, ns, name, index = match.groups()
+ if index is None or python.PyString_GET_SIZE(index) == 0:
+ index = 0
+ else:
+ index = python.PyNumber_Int(index)
+ has_dot = _cstr(dot)[0] == c'.'
+ if python.PyList_GET_SIZE(new_path) == 0:
+ if has_dot:
+ # path '.child' => ignore root
+ python.PyList_Append(new_path, _RELATIVE_PATH_SEGMENT)
+ elif index != 0:
+ raise ValueError, "index not allowed on root node"
+ elif not has_dot:
+ raise ValueError, "invalid path"
+ python.PyList_Append(new_path, (ns, name, index))
+
+ path_pos = match.end()
+ if python.PyList_GET_SIZE(new_path) == 0 or \
+ python.PyString_GET_SIZE(path) > path_pos:
+ raise ValueError, "invalid path"
+ return new_path
+
+cdef _parseObjectPathList(path):
+ """Parse object path sequence into a 'hrefOnameOhrefOnameOOO' string and
+ an index list. The index list is None if no index was used in the path.
+ """
+ cdef char* index_pos
+ cdef char* index_end
+ cdef char* c_name
+ new_path = []
+ for item in path:
+ item = item.strip()
+ if python.PyList_GET_SIZE(new_path) == 0 and item == '':
+ # path '.child' => ignore root
+ ns = name = None
+ index = 0
+ else:
+ ns, name = cetree.getNsTag(item)
+ c_name = _cstr(name)
+ index_pos = cstd.strchr(c_name, c'[')
+ if index_pos is NULL:
+ index = 0
+ else:
+ name = python.PyString_FromStringAndSize(
+ c_name, <Py_ssize_t>(index_pos - c_name))
+ index_pos = index_pos + 1
+ index_end = cstd.strchr(index_pos, c']')
+ if index_end is NULL:
+ raise ValueError, "index must be enclosed in []"
+ index = python.PyNumber_Int(
+ python.PyString_FromStringAndSize(
+ index_pos, <Py_ssize_t>(index_end - index_pos)))
+ if python.PyList_GET_SIZE(new_path) == 0 and index != 0:
+ raise ValueError, "index not allowed on root node"
+ python.PyList_Append(new_path, (ns, name, index))
+ if python.PyList_GET_SIZE(new_path) == 0:
+ raise ValueError, "invalid path"
+ return new_path
+
+cdef _ObjectPath* _buildObjectPathSegments(path_list) except NULL:
+ cdef _ObjectPath* c_path
+ cdef _ObjectPath* c_path_segments
+ cdef Py_ssize_t c_len
+ c_len = python.PyList_GET_SIZE(path_list)
+ c_path_segments = <_ObjectPath*>python.PyMem_Malloc(sizeof(_ObjectPath) *
+ c_len)
+ if c_path_segments is NULL:
+ python.PyErr_NoMemory()
+ return NULL
+ c_path = c_path_segments
+ for href, name, index in path_list:
+ if href is None:
+ c_path[0].href = NULL
+ else:
+ c_path[0].href = _cstr(href)
+ if name is None:
+ c_path[0].name = NULL
+ else:
+ c_path[0].name = _cstr(name)
+ c_path[0].index = index
+ c_path = c_path + 1
+ return c_path_segments
+
+cdef _findObjectPath(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len,
+ default_value, int use_default):
+ """Follow the path to find the target element.
+ """
+ cdef tree.xmlNode* c_node
+ cdef char* c_href
+ cdef char* c_name
+ cdef Py_ssize_t c_index
+ c_node = root._c_node
+ c_name = c_path[0].name
+ c_href = c_path[0].href
+ if c_href is NULL or c_href[0] == c'\0':
+ c_href = tree._getNs(c_node)
+ if not cetree.tagMatches(c_node, c_href, c_name):
+ raise ValueError, "root element does not match: need %s, got %s" % \
+ (cetree.namespacedNameFromNsName(c_href, c_name), root.tag)
+
+ while c_node is not NULL:
+ c_path_len = c_path_len - 1
+ if c_path_len <= 0:
+ return cetree.elementFactory(root._doc, c_node)
+
+ c_path = c_path + 1
+ if c_path[0].href is not NULL:
+ c_href = c_path[0].href # otherwise: keep parent namespace
+ c_name = c_path[0].name
+ c_index = c_path[0].index
+
+ if c_index < 0:
+ c_node = c_node.last
+ else:
+ c_node = c_node.children
+ c_node = _findFollowingSibling(c_node, c_href, c_name, c_index)
+
+ if use_default:
+ return default_value
+ else:
+ tag = cetree.namespacedNameFromNsName(c_href, c_name)
+ raise AttributeError, "no such child: " + tag
+
+cdef _createObjectPath(_Element root, _ObjectPath* c_path,
+ Py_ssize_t c_path_len, int replace, value):
+ """Follow the path to find the target element, build the missing children
+ as needed and set the target element to 'value'. If replace is true, an
+ existing value is replaced, otherwise the new value is added.
+ """
+ cdef _Element child
+ cdef tree.xmlNode* c_node
+ cdef tree.xmlNode* c_child
+ cdef char* c_href
+ cdef char* c_name
+ cdef Py_ssize_t c_index
+ if c_path_len == 1:
+ raise TypeError, "cannot update root node"
+
+ c_node = root._c_node
+ c_name = c_path[0].name
+ c_href = c_path[0].href
+ if c_href is NULL or c_href[0] == c'\0':
+ c_href = tree._getNs(c_node)
+ if not cetree.tagMatches(c_node, c_href, c_name):
+ raise ValueError, "root element does not match: need %s, got %s" % \
+ (cetree.namespacedNameFromNsName(c_href, c_name), root.tag)
+
+ while c_path_len > 1:
+ c_path_len = c_path_len - 1
+ c_path = c_path + 1
+ if c_path[0].href is not NULL:
+ c_href = c_path[0].href # otherwise: keep parent namespace
+ c_name = c_path[0].name
+ c_index = c_path[0].index
+
+ if c_index < 0:
+ c_child = c_node.last
+ else:
+ c_child = c_node.children
+ c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
+
+ if c_child is not NULL:
+ c_node = c_child
+ elif c_index != 0:
+ raise TypeError, \
+ "creating indexed path attributes is not supported"
+ elif c_path_len == 1:
+ _appendValue(cetree.elementFactory(root._doc, c_node),
+ cetree.namespacedNameFromNsName(c_href, c_name),
+ value)
+ return
+ else:
+ child = cetree.makeSubElement(
+ cetree.elementFactory(root._doc, c_node),
+ cetree.namespacedNameFromNsName(c_href, c_name),
+ None, None, None, None)
+ c_node = child._c_node
+
+ # if we get here, the entire path was already there
+ if replace:
+ element = cetree.elementFactory(root._doc, c_node)
+ _replaceElement(element, value)
+ else:
+ _appendValue(cetree.elementFactory(root._doc, c_node.parent),
+ cetree.namespacedName(c_node), value)
+
+cdef _buildDescendantPaths(tree.xmlNode* c_node, prefix_string):
+ """Returns a list of all descendant paths.
+ """
+ tag = cetree.namespacedName(c_node)
+ if prefix_string:
+ if prefix_string[-1] != '.':
+ prefix_string = prefix_string + '.'
+ prefix_string = prefix_string + tag
+ else:
+ prefix_string = tag
+ path = [prefix_string]
+ path_list = []
+ _recursiveBuildDescendantPaths(c_node, path, path_list)
+ return path_list
+
+cdef _recursiveBuildDescendantPaths(tree.xmlNode* c_node, path, path_list):
+ """Fills the list 'path_list' with all descendant paths, initial prefix
+ being in the list 'path'.
+ """
+ cdef python.PyObject* dict_result
+ cdef tree.xmlNode* c_child
+ cdef char* c_href
+ python.PyList_Append(path_list, '.'.join(path))
+ tags = {}
+ c_href = tree._getNs(c_node)
+ c_child = c_node.children
+ while c_child is not NULL:
+ while c_child.type != tree.XML_ELEMENT_NODE:
+ c_child = c_child.next
+ if c_child is NULL:
+ return
+ if c_href is tree._getNs(c_child):
+ tag = c_child.name
+ elif c_href is not NULL and tree._getNs(c_child) is NULL:
+ # special case: parent has namespace, child does not
+ tag = '{}' + c_child.name
+ else:
+ tag = cetree.namespacedName(c_child)
+ dict_result = python.PyDict_GetItem(tags, tag)
+ if dict_result is NULL:
+ count = 0
+ else:
+ count = (<object>dict_result) + 1
+ python.PyDict_SetItem(tags, tag, count)
+ if count > 0:
+ tag = tag + '[%d]' % count
+ python.PyList_Append(path, tag)
+ _recursiveBuildDescendantPaths(c_child, path, path_list)
+ del path[-1]
+ c_child = c_child.next