summaryrefslogtreecommitdiff
path: root/bzrlib/_simple_set_pyx.pxd
diff options
context:
space:
mode:
Diffstat (limited to 'bzrlib/_simple_set_pyx.pxd')
-rw-r--r--bzrlib/_simple_set_pyx.pxd91
1 files changed, 91 insertions, 0 deletions
diff --git a/bzrlib/_simple_set_pyx.pxd b/bzrlib/_simple_set_pyx.pxd
new file mode 100644
index 0000000..fc81894
--- /dev/null
+++ b/bzrlib/_simple_set_pyx.pxd
@@ -0,0 +1,91 @@
+# Copyright (C) 2009, 2010 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Interface definition of a class like PySet but without caching the hash.
+
+This is generally useful when you want to 'intern' objects, etc. Note that this
+differs from Set in that we:
+ 1) Don't have all of the .intersection, .difference, etc functions
+ 2) Do return the object from the set via queries
+ eg. SimpleSet.add(key) => saved_key and SimpleSet[key] => saved_key
+"""
+
+cdef extern from "Python.h":
+ ctypedef struct PyObject:
+ pass
+
+
+cdef public api class SimpleSet [object SimpleSetObject, type SimpleSet_Type]:
+ """A class similar to PySet, but with simpler implementation.
+
+ The main advantage is that this class uses only 2N memory to store N
+ objects rather than 4N memory. The main trade-off is that we do not cache
+ the hash value of saved objects. As such, it is assumed that computing the
+ hash will be cheap (such as strings or tuples of strings, etc.)
+
+ This also differs in that you can get back the objects that are stored
+ (like a dict), but we also don't implement the complete list of 'set'
+ operations (difference, intersection, etc).
+ """
+ # Data structure definition:
+ # This is a basic hash table using open addressing.
+ # http://en.wikipedia.org/wiki/Open_addressing
+ # Basically that means we keep an array of pointers to Python objects
+ # (called a table). Each location in the array is called a 'slot'.
+ #
+ # An empty slot holds a NULL pointer, a slot where there was an item
+ # which was then deleted will hold a pointer to _dummy, and a filled slot
+ # points at the actual object which fills that slot.
+ #
+ # The table is always a power of two, and the default location where an
+ # object is inserted is at hash(object) & (table_size - 1)
+ #
+ # If there is a collision, then we search for another location. The
+ # specific algorithm is in _lookup. We search until we:
+ # find the object
+ # find an equivalent object (by tp_richcompare(obj1, obj2, Py_EQ))
+ # find a NULL slot
+ #
+ # When an object is deleted, we set its slot to _dummy. this way we don't
+ # have to track whether there was a collision, and find the corresponding
+ # keys. (The collision resolution algorithm makes that nearly impossible
+ # anyway, because it depends on the upper bits of the hash.)
+ # The main effect of this, is that if we find _dummy, then we can insert
+ # an object there, but we have to keep searching until we find NULL to
+ # know that the object is not present elsewhere.
+
+ cdef Py_ssize_t _used # active
+ cdef Py_ssize_t _fill # active + dummy
+ cdef Py_ssize_t _mask # Table contains (mask+1) slots, a power of 2
+ cdef PyObject **_table # Pyrex/Cython doesn't support arrays to 'object'
+ # so we manage it manually
+
+ cdef PyObject *_get(self, object key) except? NULL
+ cdef object _add(self, key)
+ cdef int _discard(self, key) except -1
+ cdef int _insert_clean(self, PyObject *key) except -1
+ cdef Py_ssize_t _resize(self, Py_ssize_t min_unused) except -1
+
+
+# TODO: might want to export the C api here, though it is all available from
+# the class object...
+cdef api SimpleSet SimpleSet_New()
+cdef api object SimpleSet_Add(object self, object key)
+cdef api int SimpleSet_Contains(object self, object key) except -1
+cdef api int SimpleSet_Discard(object self, object key) except -1
+cdef api PyObject *SimpleSet_Get(SimpleSet self, object key) except? NULL
+cdef api Py_ssize_t SimpleSet_Size(object self) except -1
+cdef api int SimpleSet_Next(object self, Py_ssize_t *pos, PyObject **key) except -1