diff options
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r-- | Objects/setobject.c | 516 |
1 files changed, 254 insertions, 262 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c index ea5a24c516..adc99da627 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1,154 +1,121 @@ /* set object implementation + Written and maintained by Raymond D. Hettinger <python@rcn.com> Derived from Lib/sets.py and Objects/dictobject.c. - Copyright (c) 2003-2008 Python Software Foundation. + Copyright (c) 2003-2013 Python Software Foundation. All rights reserved. -*/ -#include "Python.h" -#include "structmember.h" -#include "stringlib/eq.h" + The basic lookup function used by all operations. + This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. -/* Set a key error with the specified argument, wrapping it in a - * tuple automatically so that tuple keys are not unpacked as the - * exception arguments. */ -static void -set_key_error(PyObject *arg) -{ - PyObject *tup; - tup = PyTuple_Pack(1, arg); - if (!tup) - return; /* caller will expect error to be set anyway */ - PyErr_SetObject(PyExc_KeyError, tup); - Py_DECREF(tup); -} + The initial probe index is computed as hash mod the table size. + Subsequent probe indices are computed as explained in Objects/dictobject.c. -/* This must be >= 1. */ -#define PERTURB_SHIFT 5 + To improve cache locality, each probe inspects a series of consecutive + nearby entries before moving on to probes elsewhere in memory. This leaves + us with a hybrid of linear probing and open addressing. The linear probing + reduces the cost of hash collisions because consecutive memory accesses + tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, + we then use open addressing with the upper bits from the hash value. This + helps break-up long chains of collisions. -/* Object used as dummy key to fill deleted entries */ -static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */ + All arithmetic on hash should ignore overflow. -#ifdef Py_REF_DEBUG -PyObject * -_PySet_Dummy(void) -{ - return dummy; -} -#endif + Unlike the dictionary implementation, the lookkey functions can return + NULL if the rich comparison returns an error. +*/ -#define INIT_NONZERO_SET_SLOTS(so) do { \ - (so)->table = (so)->smalltable; \ - (so)->mask = PySet_MINSIZE - 1; \ - (so)->hash = -1; \ - } while(0) +#include "Python.h" +#include "structmember.h" +#include "stringlib/eq.h" -#define EMPTY_TO_MINSIZE(so) do { \ - memset((so)->smalltable, 0, sizeof((so)->smalltable)); \ - (so)->used = (so)->fill = 0; \ - INIT_NONZERO_SET_SLOTS(so); \ - } while(0) +/* Object used as dummy key to fill deleted entries */ +static PyObject _dummy_struct; -/* Reuse scheme to save calls to malloc, free, and memset */ -#ifndef PySet_MAXFREELIST -#define PySet_MAXFREELIST 80 -#endif -static PySetObject *free_list[PySet_MAXFREELIST]; -static int numfree = 0; +#define dummy (&_dummy_struct) +/* Exported for the gdb plugin's benefit. */ +PyObject *_PySet_Dummy = dummy; -/* -The basic lookup function used by all operations. -This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. -Open addressing is preferred over chaining since the link overhead for -chaining would be substantial (100% with typical malloc overhead). -The initial probe index is computed as hash mod the table size. Subsequent -probe indices are computed as explained in Objects/dictobject.c. +/* ======================================================================== */ +/* ======= Begin logic for probing the hash table ========================= */ -All arithmetic on hash should ignore overflow. +/* Set this to zero to turn-off linear probing */ +#ifndef LINEAR_PROBES +#define LINEAR_PROBES 9 +#endif -Unlike the dictionary implementation, the lookkey functions can return -NULL if the rich comparison returns an error. -*/ +/* This must be >= 1 */ +#define PERTURB_SHIFT 5 static setentry * -set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) +set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) { - register size_t i; /* Unsigned for defined overflow behavior. */ - register size_t perturb; - register setentry *freeslot; - register size_t mask = so->mask; setentry *table = so->table; - register setentry *entry; - register int cmp; - PyObject *startkey; - - i = (size_t)hash & mask; - entry = &table[i]; - if (entry->key == NULL || entry->key == key) + setentry *freeslot = NULL; + setentry *entry; + size_t perturb = hash; + size_t mask = so->mask; + size_t i = (size_t)hash; /* Unsigned for defined overflow behavior. */ + size_t j; + int cmp; + + entry = &table[i & mask]; + if (entry->key == NULL) return entry; - if (entry->key == dummy) - freeslot = entry; - else { - if (entry->hash == hash) { - startkey = entry->key; + while (1) { + if (entry->key == key) + return entry; + if (entry->hash == hash && entry->key != dummy) { + PyObject *startkey = entry->key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); if (cmp < 0) return NULL; - if (table == so->table && entry->key == startkey) { - if (cmp > 0) - return entry; - } - else { - /* The compare did major nasty stuff to the - * set: start over. - */ + if (table != so->table || entry->key != startkey) return set_lookkey(so, key, hash); - } + if (cmp > 0) + return entry; } - freeslot = NULL; - } + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; - /* In the loop, key == dummy is by far (factor of 100s) the - least likely outcome, so test for that last. */ - for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; - entry = &table[i & mask]; - if (entry->key == NULL) { - if (freeslot != NULL) - entry = freeslot; - break; - } - if (entry->key == key) - break; - if (entry->hash == hash && entry->key != dummy) { - startkey = entry->key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) - return NULL; - if (table == so->table && entry->key == startkey) { + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; + if (entry->key == NULL) + goto found_null; + if (entry->key == key) + return entry; + if (entry->hash == hash && entry->key != dummy) { + PyObject *startkey = entry->key; + Py_INCREF(startkey); + cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) + return NULL; + if (table != so->table || entry->key != startkey) + return set_lookkey(so, key, hash); if (cmp > 0) - break; - } - else { - /* The compare did major nasty stuff to the - * set: start over. - */ - return set_lookkey(so, key, hash); + return entry; } + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; } - else if (entry->key == dummy && freeslot == NULL) - freeslot = entry; + + perturb >>= PERTURB_SHIFT; + i = i * 5 + 1 + perturb; + + entry = &table[i & mask]; + if (entry->key == NULL) + goto found_null; } - return entry; + found_null: + return freeslot == NULL ? entry : freeslot; } /* @@ -157,14 +124,15 @@ set_lookkey(PySetObject *so, PyObject *key, register Py_hash_t hash) * see if the comparison altered the table. */ static setentry * -set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) +set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash) { - register size_t i; /* Unsigned for defined overflow behavior. */ - register size_t perturb; - register setentry *freeslot; - register size_t mask = so->mask; setentry *table = so->table; - register setentry *entry; + setentry *freeslot = NULL; + setentry *entry; + size_t perturb = hash; + size_t mask = so->mask; + size_t i = (size_t)hash; + size_t j; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -174,46 +142,94 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, register Py_hash_t hash) so->lookup = set_lookkey; return set_lookkey(so, key, hash); } - i = (size_t)hash & mask; - entry = &table[i]; - if (entry->key == NULL || entry->key == key) + + entry = &table[i & mask]; + if (entry->key == NULL) return entry; - if (entry->key == dummy) - freeslot = entry; - else { - if (entry->hash == hash && unicode_eq(entry->key, key)) - return entry; - freeslot = NULL; - } - /* In the loop, key == dummy is by far (factor of 100s) the - least likely outcome, so test for that last. */ - for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; - entry = &table[i & mask]; - if (entry->key == NULL) - return freeslot == NULL ? entry : freeslot; + while (1) { if (entry->key == key || (entry->hash == hash - && entry->key != dummy - && unicode_eq(entry->key, key))) + && entry->key != dummy + && unicode_eq(entry->key, key))) return entry; if (entry->key == dummy && freeslot == NULL) freeslot = entry; + + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; + if (entry->key == NULL) + goto found_null; + if (entry->key == key + || (entry->hash == hash + && entry->key != dummy + && unicode_eq(entry->key, key))) + return entry; + if (entry->key == dummy && freeslot == NULL) + freeslot = entry; + } + + perturb >>= PERTURB_SHIFT; + i = i * 5 + 1 + perturb; + + entry = &table[i & mask]; + if (entry->key == NULL) + goto found_null; } - assert(0); /* NOT REACHED */ - return 0; + found_null: + return freeslot == NULL ? entry : freeslot; +} + +/* +Internal routine used by set_table_resize() to insert an item which is +known to be absent from the set. This routine also assumes that +the set contains no deleted entries. Besides the performance benefit, +using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209). +Note that no refcounts are changed by this routine; if needed, the caller +is responsible for incref'ing `key`. +*/ +static void +set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash) +{ + setentry *table = so->table; + setentry *entry; + size_t perturb = hash; + size_t mask = (size_t)so->mask; + size_t i = (size_t)hash; + size_t j; + + while (1) { + entry = &table[i & mask]; + if (entry->key == NULL) + goto found_null; + for (j = 1 ; j <= LINEAR_PROBES ; j++) { + entry = &table[(i + j) & mask]; + if (entry->key == NULL) + goto found_null; + } + perturb >>= PERTURB_SHIFT; + i = i * 5 + 1 + perturb; + } + found_null: + entry->key = key; + entry->hash = hash; + so->fill++; + so->used++; } +/* ======== End logic for probing the hash table ========================== */ +/* ======================================================================== */ + + /* Internal routine to insert a new key into the table. Used by the public insert routine. Eats a reference to key. */ static int -set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash) +set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash) { - register setentry *entry; + setentry *entry; assert(so->lookup != NULL); entry = so->lookup(so, key, hash); @@ -230,7 +246,6 @@ set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash) entry->key = key; entry->hash = hash; so->used++; - Py_DECREF(dummy); } else { /* ACTIVE */ Py_DECREF(key); @@ -239,35 +254,6 @@ set_insert_key(register PySetObject *so, PyObject *key, Py_hash_t hash) } /* -Internal routine used by set_table_resize() to insert an item which is -known to be absent from the set. This routine also assumes that -the set contains no deleted entries. Besides the performance benefit, -using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209). -Note that no refcounts are changed by this routine; if needed, the caller -is responsible for incref'ing `key`. -*/ -static void -set_insert_clean(register PySetObject *so, PyObject *key, Py_hash_t hash) -{ - register size_t i; - register size_t perturb; - register size_t mask = (size_t)so->mask; - setentry *table = so->table; - register setentry *entry; - - i = (size_t)hash & mask; - entry = &table[i]; - for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; - entry = &table[i & mask]; - } - so->fill++; - entry->key = key; - entry->hash = hash; - so->used++; -} - -/* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may actually be smaller than the old one. @@ -330,23 +316,14 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) so->table = newtable; so->mask = newsize - 1; memset(newtable, 0, sizeof(setentry) * newsize); + i = so->used; so->used = 0; - i = so->fill; so->fill = 0; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ for (entry = oldtable; i > 0; entry++) { - if (entry->key == NULL) { - /* UNUSED */ - ; - } else if (entry->key == dummy) { - /* DUMMY */ - --i; - assert(entry->key == dummy); - Py_DECREF(entry->key); - } else { - /* ACTIVE */ + if (entry->key != NULL && entry->key != dummy) { --i; set_insert_clean(so, entry->key, entry->hash); } @@ -360,9 +337,9 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */ static int -set_add_entry(register PySetObject *so, setentry *entry) +set_add_entry(PySetObject *so, setentry *entry) { - register Py_ssize_t n_used; + Py_ssize_t n_used; PyObject *key = entry->key; Py_hash_t hash = entry->hash; @@ -379,10 +356,10 @@ set_add_entry(register PySetObject *so, setentry *entry) } static int -set_add_key(register PySetObject *so, PyObject *key) +set_add_key(PySetObject *so, PyObject *key) { - register Py_hash_t hash; - register Py_ssize_t n_used; + Py_hash_t hash; + Py_ssize_t n_used; if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -407,7 +384,7 @@ set_add_key(register PySetObject *so, PyObject *key) static int set_discard_entry(PySetObject *so, setentry *oldentry) -{ register setentry *entry; +{ setentry *entry; PyObject *old_key; entry = (so->lookup)(so, oldentry->key, oldentry->hash); @@ -416,7 +393,6 @@ set_discard_entry(PySetObject *so, setentry *oldentry) if (entry->key == NULL || entry->key == dummy) return DISCARD_NOTFOUND; old_key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; Py_DECREF(old_key); @@ -426,8 +402,8 @@ set_discard_entry(PySetObject *so, setentry *oldentry) static int set_discard_key(PySetObject *so, PyObject *key) { - register Py_hash_t hash; - register setentry *entry; + Py_hash_t hash; + setentry *entry; PyObject *old_key; assert (PyAnySet_Check(so)); @@ -444,13 +420,23 @@ set_discard_key(PySetObject *so, PyObject *key) if (entry->key == NULL || entry->key == dummy) return DISCARD_NOTFOUND; old_key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; Py_DECREF(old_key); return DISCARD_FOUND; } +static void +set_empty_to_minsize(PySetObject *so) +{ + memset(so->smalltable, 0, sizeof(so->smalltable)); + so->fill = 0; + so->used = 0; + so->mask = PySet_MINSIZE - 1; + so->table = so->smalltable; + so->hash = -1; +} + static int set_clear_internal(PySetObject *so) { @@ -458,14 +444,13 @@ set_clear_internal(PySetObject *so) int table_is_malloced; Py_ssize_t fill; setentry small_copy[PySet_MINSIZE]; -#ifdef Py_DEBUG - Py_ssize_t i, n; - assert (PyAnySet_Check(so)); - n = so->mask + 1; - i = 0; +#ifdef Py_DEBUG + Py_ssize_t i = 0; + Py_ssize_t n = so->mask + 1; #endif + assert (PyAnySet_Check(so)); table = so->table; assert(table != NULL); table_is_malloced = table != so->smalltable; @@ -478,7 +463,7 @@ set_clear_internal(PySetObject *so) */ fill = so->fill; if (table_is_malloced) - EMPTY_TO_MINSIZE(so); + set_empty_to_minsize(so); else if (fill > 0) { /* It's a small table with something that needs to be cleared. @@ -487,7 +472,7 @@ set_clear_internal(PySetObject *so) */ memcpy(small_copy, table, sizeof(small_copy)); table = small_copy; - EMPTY_TO_MINSIZE(so); + set_empty_to_minsize(so); } /* else it's a small table that's already empty */ @@ -502,7 +487,8 @@ set_clear_internal(PySetObject *so) #endif if (entry->key) { --fill; - Py_DECREF(entry->key); + if (entry->key != dummy) + Py_DECREF(entry->key); } #ifdef Py_DEBUG else @@ -533,7 +519,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) { Py_ssize_t i; Py_ssize_t mask; - register setentry *table; + setentry *table; assert (PyAnySet_Check(so)); i = *pos_ptr; @@ -553,7 +539,7 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) static void set_dealloc(PySetObject *so) { - register setentry *entry; + setentry *entry; Py_ssize_t fill = so->fill; PyObject_GC_UnTrack(so); Py_TRASHCAN_SAFE_BEGIN(so) @@ -563,15 +549,13 @@ set_dealloc(PySetObject *so) for (entry = so->table; fill > 0; entry++) { if (entry->key) { --fill; - Py_DECREF(entry->key); + if (entry->key != dummy) + Py_DECREF(entry->key); } } if (so->table != so->smalltable) PyMem_DEL(so->table); - if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so)) - free_list[numfree++] = so; - else - Py_TYPE(so)->tp_free(so); + Py_TYPE(so)->tp_free(so); Py_TRASHCAN_SAFE_END(so) } @@ -632,8 +616,8 @@ set_merge(PySetObject *so, PyObject *otherset) PySetObject *other; PyObject *key; Py_hash_t hash; - register Py_ssize_t i; - register setentry *entry; + Py_ssize_t i; + setentry *entry; assert (PyAnySet_Check(so)); assert (PyAnySet_Check(otherset)); @@ -701,8 +685,8 @@ set_contains_entry(PySetObject *so, setentry *entry) static PyObject * set_pop(PySetObject *so) { - register Py_ssize_t i = 0; - register setentry *entry; + Py_ssize_t i = 0; + setentry *entry; PyObject *key; assert (PyAnySet_Check(so)); @@ -734,7 +718,6 @@ set_pop(PySetObject *so) } } key = entry->key; - Py_INCREF(dummy); entry->key = dummy; so->used--; so->table[0].hash = i + 1; /* next place to start */ @@ -869,8 +852,8 @@ static PyMethodDef setiter_methods[] = { static PyObject *setiter_iternext(setiterobject *si) { PyObject *key; - register Py_ssize_t i, mask; - register setentry *entry; + Py_ssize_t i, mask; + setentry *entry; PySetObject *so = si->si_set; if (so == NULL) @@ -1024,33 +1007,19 @@ PyDoc_STRVAR(update_doc, static PyObject * make_new_set(PyTypeObject *type, PyObject *iterable) { - register PySetObject *so = NULL; - - if (dummy == NULL) { /* Auto-initialize dummy */ - dummy = PyUnicode_FromString("<dummy key>"); - if (dummy == NULL) - return NULL; - } + PySetObject *so = NULL; /* create PySetObject structure */ - if (numfree && - (type == &PySet_Type || type == &PyFrozenSet_Type)) { - so = free_list[--numfree]; - assert (so != NULL && PyAnySet_CheckExact(so)); - Py_TYPE(so) = type; - _Py_NewReference((PyObject *)so); - EMPTY_TO_MINSIZE(so); - PyObject_GC_Track(so); - } else { - so = (PySetObject *)type->tp_alloc(type, 0); - if (so == NULL) - return NULL; - /* tp_alloc has already zeroed the structure */ - assert(so->table == NULL && so->fill == 0 && so->used == 0); - INIT_NONZERO_SET_SLOTS(so); - } + so = (PySetObject *)type->tp_alloc(type, 0); + if (so == NULL) + return NULL; + so->fill = 0; + so->used = 0; + so->mask = PySet_MINSIZE - 1; + so->table = so->smalltable; so->lookup = set_lookkey_unicode; + so->hash = -1; so->weakreflist = NULL; if (iterable != NULL) { @@ -1113,35 +1082,15 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) int PySet_ClearFreeList(void) { - int freelist_size = numfree; - PySetObject *so; - - while (numfree) { - numfree--; - so = free_list[numfree]; - PyObject_GC_Del(so); - } - return freelist_size; + return 0; } void PySet_Fini(void) { - PySet_ClearFreeList(); - Py_CLEAR(dummy); Py_CLEAR(emptyfrozenset); } -/* Print summary info about the state of the optimized allocator */ -void -_PySet_DebugMallocStats(FILE *out) -{ - _PyDebugAllocatorStats(out, - "free PySetObject", - numfree, sizeof(PySetObject)); -} - - static PyObject * set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { @@ -1952,7 +1901,7 @@ set_remove(PySetObject *so, PyObject *key) } if (rv == DISCARD_NOTFOUND) { - set_key_error(key); + _PyErr_SetKeyError(key); return NULL; } Py_RETURN_NONE; @@ -2414,7 +2363,7 @@ test_c_api(PySetObject *so) Py_ssize_t count; char *s; Py_ssize_t i; - PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x; + PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL; PyObject *ob = (PyObject *)so; Py_hash_t hash; PyObject *str; @@ -2537,3 +2486,46 @@ test_c_api(PySetObject *so) #undef assertRaises #endif + +/***** Dummy Struct *************************************************/ + +static PyObject * +dummy_repr(PyObject *op) +{ + return PyUnicode_FromString("<dummy key>"); +} + +static void +dummy_dealloc(PyObject* ignore) +{ + Py_FatalError("deallocating <dummy key>"); +} + +static PyTypeObject _PySetDummy_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "<dummy key> type", + 0, + 0, + dummy_dealloc, /*tp_dealloc*/ /*never called*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + dummy_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /*tp_flags */ +}; + +static PyObject _dummy_struct = { + _PyObject_EXTRA_INIT + 2, &_PySetDummy_Type +}; + |