summaryrefslogtreecommitdiff
path: root/Objects/setobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/setobject.c')
-rw-r--r--Objects/setobject.c625
1 files changed, 346 insertions, 279 deletions
diff --git a/Objects/setobject.c b/Objects/setobject.c
index 4ef692db33..fdb9d3600d 100644
--- a/Objects/setobject.c
+++ b/Objects/setobject.c
@@ -26,7 +26,6 @@
#include "Python.h"
#include "structmember.h"
-#include "stringlib/eq.h"
/* Object used as dummy key to fill deleted entries */
static PyObject _dummy_struct;
@@ -48,19 +47,20 @@ static PyObject _dummy_struct;
static setentry *
set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- setentry *table = so->table;
- setentry *freeslot = NULL;
+ setentry *table;
setentry *entry;
- size_t perturb = hash;
+ size_t perturb;
size_t mask = so->mask;
size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
size_t j;
int cmp;
- entry = &table[i];
+ entry = &so->table[i];
if (entry->key == NULL)
return entry;
+ perturb = hash;
+
while (1) {
if (entry->hash == hash) {
PyObject *startkey = entry->key;
@@ -70,8 +70,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
- && unicode_eq(startkey, key))
+ && _PyUnicode_EQ(startkey, key))
return entry;
+ table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@@ -83,14 +84,12 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
mask = so->mask; /* help avoid a register spill */
}
- if (entry->hash == -1 && freeslot == NULL)
- freeslot = entry;
if (i + LINEAR_PROBES <= mask) {
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
- if (entry->key == NULL)
- goto found_null;
+ if (entry->hash == 0 && entry->key == NULL)
+ return entry;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
@@ -98,8 +97,9 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
- && unicode_eq(startkey, key))
+ && _PyUnicode_EQ(startkey, key))
return entry;
+ table = so->table;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@@ -111,7 +111,104 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
mask = so->mask;
}
- if (entry->hash == -1 && freeslot == NULL)
+ }
+ }
+
+ perturb >>= PERTURB_SHIFT;
+ i = (i * 5 + 1 + perturb) & mask;
+
+ entry = &so->table[i];
+ if (entry->key == NULL)
+ return entry;
+ }
+}
+
+static int set_table_resize(PySetObject *, Py_ssize_t);
+
+static int
+set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+ setentry *table;
+ setentry *freeslot;
+ setentry *entry;
+ size_t perturb;
+ size_t mask;
+ size_t i; /* Unsigned for defined overflow behavior */
+ size_t j;
+ int cmp;
+
+ /* Pre-increment is necessary to prevent arbitrary code in the rich
+ comparison from deallocating the key just before the insertion. */
+ Py_INCREF(key);
+
+ restart:
+
+ mask = so->mask;
+ i = (size_t)hash & mask;
+
+ entry = &so->table[i];
+ if (entry->key == NULL)
+ goto found_unused;
+
+ freeslot = NULL;
+ perturb = hash;
+
+ while (1) {
+ if (entry->hash == hash) {
+ PyObject *startkey = entry->key;
+ /* startkey cannot be a dummy because the dummy hash field is -1 */
+ assert(startkey != dummy);
+ if (startkey == key)
+ goto found_active;
+ if (PyUnicode_CheckExact(startkey)
+ && PyUnicode_CheckExact(key)
+ && _PyUnicode_EQ(startkey, key))
+ goto found_active;
+ table = so->table;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp > 0) /* likely */
+ goto found_active;
+ if (cmp < 0)
+ goto comparison_error;
+ /* Continuing the search from the current entry only makes
+ sense if the table and entry are unchanged; otherwise,
+ we have to restart from the beginning */
+ if (table != so->table || entry->key != startkey)
+ goto restart;
+ mask = so->mask; /* help avoid a register spill */
+ }
+ else if (entry->hash == -1 && freeslot == NULL)
+ freeslot = entry;
+
+ if (i + LINEAR_PROBES <= mask) {
+ for (j = 0 ; j < LINEAR_PROBES ; j++) {
+ entry++;
+ if (entry->hash == 0 && entry->key == NULL)
+ goto found_unused_or_dummy;
+ if (entry->hash == hash) {
+ PyObject *startkey = entry->key;
+ assert(startkey != dummy);
+ if (startkey == key)
+ goto found_active;
+ if (PyUnicode_CheckExact(startkey)
+ && PyUnicode_CheckExact(key)
+ && _PyUnicode_EQ(startkey, key))
+ goto found_active;
+ table = so->table;
+ Py_INCREF(startkey);
+ cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
+ Py_DECREF(startkey);
+ if (cmp > 0)
+ goto found_active;
+ if (cmp < 0)
+ goto comparison_error;
+ if (table != so->table || entry->key != startkey)
+ goto restart;
+ mask = so->mask;
+ }
+ else if (entry->hash == -1 && freeslot == NULL)
freeslot = entry;
}
}
@@ -119,29 +216,51 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
perturb >>= PERTURB_SHIFT;
i = (i * 5 + 1 + perturb) & mask;
- entry = &table[i];
+ entry = &so->table[i];
if (entry->key == NULL)
- goto found_null;
+ goto found_unused_or_dummy;
}
- found_null:
- return freeslot == NULL ? entry : freeslot;
+
+ found_unused_or_dummy:
+ if (freeslot == NULL)
+ goto found_unused;
+ so->used++;
+ freeslot->key = key;
+ freeslot->hash = hash;
+ return 0;
+
+ found_unused:
+ so->fill++;
+ so->used++;
+ entry->key = key;
+ entry->hash = hash;
+ if ((size_t)so->fill*3 < mask*2)
+ return 0;
+ return set_table_resize(so, so->used);
+
+ found_active:
+ Py_DECREF(key);
+ return 0;
+
+ comparison_error:
+ Py_DECREF(key);
+ return -1;
}
/*
Internal routine used by set_table_resize() to insert an item which is
known to be absent from the set. This routine also assumes that
the set contains no deleted entries. Besides the performance benefit,
-using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
-Note that no refcounts are changed by this routine; if needed, the caller
-is responsible for incref'ing `key`.
+there is also safety benefit since using set_add_entry() risks making
+a callback in the middle of a set_table_resize(), see issue 1456209.
+The caller is responsible for updating the key's reference count and
+the setobject's fill and used fields.
*/
static void
-set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
+set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
{
- setentry *table = so->table;
setentry *entry;
size_t perturb = hash;
- size_t mask = (size_t)so->mask;
size_t i = (size_t)hash & mask;
size_t j;
@@ -162,45 +281,11 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
found_null:
entry->key = key;
entry->hash = hash;
- so->fill++;
- so->used++;
}
/* ======== End logic for probing the hash table ========================== */
/* ======================================================================== */
-
-/*
-Internal routine to insert a new key into the table.
-Used by the public insert routine.
-Eats a reference to key.
-*/
-static int
-set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
-{
- setentry *entry;
-
- entry = set_lookkey(so, key, hash);
- if (entry == NULL)
- return -1;
- if (entry->key == NULL) {
- /* UNUSED */
- entry->key = key;
- entry->hash = hash;
- so->fill++;
- so->used++;
- } else if (entry->key == dummy) {
- /* DUMMY */
- entry->key = key;
- entry->hash = hash;
- so->used++;
- } else {
- /* ACTIVE */
- Py_DECREF(key);
- }
- return 0;
-}
-
/*
Restructure the table by allocating a new table and reinserting all
keys again. When entries have been deleted, the new table may
@@ -213,10 +298,13 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
setentry *oldtable, *newtable, *entry;
Py_ssize_t oldfill = so->fill;
Py_ssize_t oldused = so->used;
+ Py_ssize_t oldmask = so->mask;
+ size_t newmask;
int is_oldtable_malloced;
setentry small_copy[PySet_MINSIZE];
assert(minused >= 0);
+ minused = (minused > 50000) ? minused * 2 : minused * 4;
/* Find the smallest table size > minused. */
/* XXX speed-up with intrinsics */
@@ -264,25 +352,24 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
/* Make the set empty, using the new table. */
assert(newtable != oldtable);
memset(newtable, 0, sizeof(setentry) * newsize);
- so->fill = 0;
- so->used = 0;
+ so->fill = oldused;
+ so->used = oldused;
so->mask = newsize - 1;
so->table = newtable;
/* Copy the data over; this is refcount-neutral for active entries;
dummy entries aren't copied over, of course */
+ newmask = (size_t)so->mask;
if (oldfill == oldused) {
- for (entry = oldtable; oldused > 0; entry++) {
+ for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
if (entry->key != NULL) {
- oldused--;
- set_insert_clean(so, entry->key, entry->hash);
+ set_insert_clean(newtable, newmask, entry->key, entry->hash);
}
}
} else {
- for (entry = oldtable; oldused > 0; entry++) {
+ for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
if (entry->key != NULL && entry->key != dummy) {
- oldused--;
- set_insert_clean(so, entry->key, entry->hash);
+ set_insert_clean(newtable, newmask, entry->key, entry->hash);
}
}
}
@@ -292,31 +379,42 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
return 0;
}
-/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
+static int
+set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
+{
+ setentry *entry;
+
+ entry = set_lookkey(so, key, hash);
+ if (entry != NULL)
+ return entry->key != NULL;
+ return -1;
+}
+
+#define DISCARD_NOTFOUND 0
+#define DISCARD_FOUND 1
static int
-set_add_entry(PySetObject *so, setentry *entry)
+set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
{
- Py_ssize_t n_used;
- PyObject *key = entry->key;
- Py_hash_t hash = entry->hash;
+ setentry *entry;
+ PyObject *old_key;
- assert(so->fill <= so->mask); /* at least one empty slot */
- n_used = so->used;
- Py_INCREF(key);
- if (set_insert_key(so, key, hash)) {
- Py_DECREF(key);
+ entry = set_lookkey(so, key, hash);
+ if (entry == NULL)
return -1;
- }
- if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
- return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ if (entry->key == NULL)
+ return DISCARD_NOTFOUND;
+ old_key = entry->key;
+ entry->key = dummy;
+ entry->hash = -1;
+ so->used--;
+ Py_DECREF(old_key);
+ return DISCARD_FOUND;
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) ||
@@ -325,50 +423,35 @@ set_add_key(PySetObject *so, PyObject *key)
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_add_entry(so, &entry);
+ return set_add_entry(so, key, hash);
}
-#define DISCARD_NOTFOUND 0
-#define DISCARD_FOUND 1
-
static int
-set_discard_entry(PySetObject *so, setentry *oldentry)
+set_contains_key(PySetObject *so, PyObject *key)
{
- setentry *entry;
- PyObject *old_key;
+ Py_hash_t hash;
- entry = set_lookkey(so, oldentry->key, oldentry->hash);
- if (entry == NULL)
- return -1;
- if (entry->key == NULL || entry->key == dummy)
- return DISCARD_NOTFOUND;
- old_key = entry->key;
- entry->key = dummy;
- entry->hash = -1;
- so->used--;
- Py_DECREF(old_key);
- return DISCARD_FOUND;
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyASCIIObject *) key)->hash) == -1) {
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ return -1;
+ }
+ return set_contains_entry(so, key, hash);
}
static int
set_discard_key(PySetObject *so, PyObject *key)
{
- setentry entry;
Py_hash_t hash;
- assert (PyAnySet_Check(so));
-
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
- entry.key = key;
- entry.hash = hash;
- return set_discard_entry(so, &entry);
+ return set_discard_entry(so, key, hash);
}
static void
@@ -449,20 +532,22 @@ set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
{
Py_ssize_t i;
Py_ssize_t mask;
- setentry *table;
+ setentry *entry;
assert (PyAnySet_Check(so));
i = *pos_ptr;
assert(i >= 0);
- table = so->table;
mask = so->mask;
- while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
+ entry = &so->table[i];
+ while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
i++;
+ entry++;
+ }
*pos_ptr = i+1;
if (i > mask)
return 0;
- assert(table[i].key != NULL);
- *entry_ptr = &table[i];
+ assert(entry != NULL);
+ *entry_ptr = entry;
return 1;
}
@@ -560,8 +645,8 @@ set_merge(PySetObject *so, PyObject *otherset)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
- if (set_table_resize(so, (so->used + other->used)*2) != 0)
+ if ((so->fill + other->used)*3 >= so->mask*2) {
+ if (set_table_resize(so, so->used + other->used) != 0)
return -1;
}
so_entry = so->table;
@@ -586,11 +671,15 @@ set_merge(PySetObject *so, PyObject *otherset)
/* If our table is empty, we can use set_insert_clean() */
if (so->fill == 0) {
- for (i = 0; i <= other->mask; i++, other_entry++) {
+ setentry *newtable = so->table;
+ size_t newmask = (size_t)so->mask;
+ so->fill = other->used;
+ so->used = other->used;
+ for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
key = other_entry->key;
if (key != NULL && key != dummy) {
Py_INCREF(key);
- set_insert_clean(so, key, other_entry->hash);
+ set_insert_clean(newtable, newmask, key, other_entry->hash);
}
}
return 0;
@@ -601,46 +690,13 @@ set_merge(PySetObject *so, PyObject *otherset)
other_entry = &other->table[i];
key = other_entry->key;
if (key != NULL && key != dummy) {
- Py_INCREF(key);
- if (set_insert_key(so, key, other_entry->hash)) {
- Py_DECREF(key);
+ if (set_add_entry(so, key, other_entry->hash))
return -1;
- }
}
}
return 0;
}
-static int
-set_contains_entry(PySetObject *so, setentry *entry)
-{
- PyObject *key;
- setentry *lu_entry;
-
- lu_entry = set_lookkey(so, entry->key, entry->hash);
- if (lu_entry == NULL)
- return -1;
- key = lu_entry->key;
- return key != NULL && key != dummy;
-}
-
-static int
-set_contains_key(PySetObject *so, PyObject *key)
-{
- setentry entry;
- Py_hash_t hash;
-
- if (!PyUnicode_CheckExact(key) ||
- (hash = ((PyASCIIObject *) key)->hash) == -1) {
- hash = PyObject_Hash(key);
- if (hash == -1)
- return -1;
- }
- entry.key = key;
- entry.hash = hash;
- return set_contains_entry(so, &entry);
-}
-
static PyObject *
set_pop(PySetObject *so)
{
@@ -682,43 +738,64 @@ set_traverse(PySetObject *so, visitproc visit, void *arg)
return 0;
}
-static Py_hash_t
-frozenset_hash(PyObject *self)
+/* Work to increase the bit dispersion for closely spaced hash values.
+ This is important because some use cases have many combinations of a
+ small number of elements with nearby hashes so that many distinct
+ combinations collapse to only a handful of distinct hash values. */
+
+static Py_uhash_t
+_shuffle_bits(Py_uhash_t h)
{
- /* Most of the constants in this hash algorithm are randomly choosen
- large primes with "interesting bit patterns" and that passed
- tests for good collision statistics on a variety of problematic
- datasets such as:
+ return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
+}
- ps = []
- for r in range(21):
- ps += itertools.combinations(range(20), r)
- num_distinct_hashes = len({hash(frozenset(s)) for s in ps})
+/* Most of the constants in this hash algorithm are randomly chosen
+ large primes with "interesting bit patterns" and that passed tests
+ for good collision statistics on a variety of problematic datasets
+ including powersets and graph structures (such as David Eppstein's
+ graph recipes in Lib/test/test_set.py) */
- */
+static Py_hash_t
+frozenset_hash(PyObject *self)
+{
PySetObject *so = (PySetObject *)self;
- Py_uhash_t h, hash = 1927868237UL;
+ Py_uhash_t hash = 0;
setentry *entry;
- Py_ssize_t pos = 0;
if (so->hash != -1)
return so->hash;
- hash *= (Py_uhash_t)PySet_GET_SIZE(self) + 1;
- while (set_next(so, &pos, &entry)) {
- /* Work to increase the bit dispersion for closely spaced hash
- values. This is important because some use cases have many
- combinations of a small number of elements with nearby
- hashes so that many distinct combinations collapse to only
- a handful of distinct hash values. */
- h = entry->hash;
- hash ^= ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
- }
- /* Make the final result spread-out in a different pattern
- than the algorithm for tuples or other python objects. */
+ /* Xor-in shuffled bits from every entry's hash field because xor is
+ commutative and a frozenset hash should be independent of order.
+
+ For speed, include null entries and dummy entries and then
+ subtract out their effect afterwards so that the final hash
+ depends only on active entries. This allows the code to be
+ vectorized by the compiler and it saves the unpredictable
+ branches that would arise when trying to exclude null and dummy
+ entries on every iteration. */
+
+ for (entry = so->table; entry <= &so->table[so->mask]; entry++)
+ hash ^= _shuffle_bits(entry->hash);
+
+ /* Remove the effect of an odd number of NULL entries */
+ if ((so->mask + 1 - so->fill) & 1)
+ hash ^= _shuffle_bits(0);
+
+ /* Remove the effect of an odd number of dummy entries */
+ if ((so->fill - so->used) & 1)
+ hash ^= _shuffle_bits(-1);
+
+ /* Factor in the number of active entries */
+ hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
+
+ /* Disperse patterns arising in nested frozensets */
hash = hash * 69069U + 907133923UL;
+
+ /* -1 is reserved as an error code */
if (hash == (Py_uhash_t)-1)
hash = 590923713UL;
+
so->hash = hash;
return hash;
}
@@ -865,7 +942,7 @@ PyTypeObject PySetIter_Type = {
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)setiter_traverse, /* tp_traverse */
0, /* tp_clear */
@@ -910,18 +987,14 @@ set_update_internal(PySetObject *so, PyObject *other)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
- if (dictsize == -1)
+ if (dictsize < 0)
return -1;
- if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
- if (set_table_resize(so, (so->used + dictsize)*2) != 0)
+ if ((so->fill + dictsize)*3 >= so->mask*2) {
+ if (set_table_resize(so, so->used + dictsize) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
- an_entry.hash = hash;
- an_entry.key = key;
- if (set_add_entry(so, &an_entry))
+ if (set_add_entry(so, key, hash))
return -1;
}
return 0;
@@ -970,9 +1043,8 @@ PyDoc_STRVAR(update_doc,
static PyObject *
make_new_set(PyTypeObject *type, PyObject *iterable)
{
- PySetObject *so = NULL;
+ PySetObject *so;
- /* create PySetObject structure */
so = (PySetObject *)type->tp_alloc(type, 0);
if (so == NULL)
return NULL;
@@ -1015,7 +1087,8 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *iterable = NULL, *result;
- if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
+ if (kwds != NULL && type == &PyFrozenSet_Type
+ && !_PyArg_NoKeywords("frozenset()", kwds))
return NULL;
if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
@@ -1042,24 +1115,9 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return emptyfrozenset;
}
-int
-PySet_ClearFreeList(void)
-{
- return 0;
-}
-
-void
-PySet_Fini(void)
-{
- Py_CLEAR(emptyfrozenset);
-}
-
static PyObject *
set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
- return NULL;
-
return make_new_set(type, NULL);
}
@@ -1201,6 +1259,8 @@ set_intersection(PySetObject *so, PyObject *other)
{
PySetObject *result;
PyObject *key, *it, *tmp;
+ Py_hash_t hash;
+ int rv;
if ((PyObject *)so == other)
return set_copy(so);
@@ -1220,13 +1280,15 @@ set_intersection(PySetObject *so, PyObject *other)
}
while (set_next((PySetObject *)other, &pos, &entry)) {
- int rv = set_contains_entry(so, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_contains_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (rv) {
- if (set_add_entry(result, entry)) {
+ if (set_add_entry(result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1242,32 +1304,15 @@ set_intersection(PySetObject *so, PyObject *other)
}
while ((key = PyIter_Next(it)) != NULL) {
- int rv;
- setentry entry;
- Py_hash_t hash = PyObject_Hash(key);
-
- if (hash == -1) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
- entry.hash = hash;
- entry.key = key;
- rv = set_contains_entry(so, &entry);
- if (rv == -1) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ goto error;
+ rv = set_contains_entry(so, key, hash);
+ if (rv < 0)
+ goto error;
if (rv) {
- if (set_add_entry(result, &entry)) {
- Py_DECREF(it);
- Py_DECREF(result);
- Py_DECREF(key);
- return NULL;
- }
+ if (set_add_entry(result, key, hash))
+ goto error;
}
Py_DECREF(key);
}
@@ -1277,6 +1322,11 @@ set_intersection(PySetObject *so, PyObject *other)
return NULL;
}
return (PyObject *)result;
+ error:
+ Py_DECREF(it);
+ Py_DECREF(result);
+ Py_DECREF(key);
+ return NULL;
}
static PyObject *
@@ -1363,6 +1413,7 @@ static PyObject *
set_isdisjoint(PySetObject *so, PyObject *other)
{
PyObject *key, *it, *tmp;
+ int rv;
if ((PyObject *)so == other) {
if (PySet_GET_SIZE(so) == 0)
@@ -1381,8 +1432,8 @@ set_isdisjoint(PySetObject *so, PyObject *other)
other = tmp;
}
while (set_next((PySetObject *)other, &pos, &entry)) {
- int rv = set_contains_entry(so, entry);
- if (rv == -1)
+ rv = set_contains_entry(so, entry->key, entry->hash);
+ if (rv < 0)
return NULL;
if (rv)
Py_RETURN_FALSE;
@@ -1395,8 +1446,6 @@ set_isdisjoint(PySetObject *so, PyObject *other)
return NULL;
while ((key = PyIter_Next(it)) != NULL) {
- int rv;
- setentry entry;
Py_hash_t hash = PyObject_Hash(key);
if (hash == -1) {
@@ -1404,11 +1453,9 @@ set_isdisjoint(PySetObject *so, PyObject *other)
Py_DECREF(it);
return NULL;
}
- entry.hash = hash;
- entry.key = key;
- rv = set_contains_entry(so, &entry);
+ rv = set_contains_entry(so, key, hash);
Py_DECREF(key);
- if (rv == -1) {
+ if (rv < 0) {
Py_DECREF(it);
return NULL;
}
@@ -1429,6 +1476,10 @@ PyDoc_STRVAR(isdisjoint_doc,
static int
set_difference_update_internal(PySetObject *so, PyObject *other)
{
+ if (PySet_GET_SIZE(so) == 0) {
+ return 0;
+ }
+
if ((PyObject *)so == other)
return set_clear_internal(so);
@@ -1437,7 +1488,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
Py_ssize_t pos = 0;
while (set_next((PySetObject *)other, &pos, &entry))
- if (set_discard_entry(so, entry) == -1)
+ if (set_discard_entry(so, entry->key, entry->hash) < 0)
return -1;
} else {
PyObject *key, *it;
@@ -1446,7 +1497,7 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
return -1;
while ((key = PyIter_Next(it)) != NULL) {
- if (set_discard_key(so, key) == -1) {
+ if (set_discard_key(so, key) < 0) {
Py_DECREF(it);
Py_DECREF(key);
return -1;
@@ -1457,10 +1508,10 @@ set_difference_update_internal(PySetObject *so, PyObject *other)
if (PyErr_Occurred())
return -1;
}
- /* If more than 1/5 are dummies, then resize them away. */
- if ((so->fill - so->used) * 5 < so->mask)
+ /* If more than 1/4th are dummies, then resize them away. */
+ if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
return 0;
- return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
+ return set_table_resize(so, so->used);
}
static PyObject *
@@ -1487,7 +1538,7 @@ set_copy_and_difference(PySetObject *so, PyObject *other)
result = set_copy(so);
if (result == NULL)
return NULL;
- if (set_difference_update_internal((PySetObject *) result, other) != -1)
+ if (set_difference_update_internal((PySetObject *) result, other) == 0)
return result;
Py_DECREF(result);
return NULL;
@@ -1497,8 +1548,15 @@ static PyObject *
set_difference(PySetObject *so, PyObject *other)
{
PyObject *result;
+ PyObject *key;
+ Py_hash_t hash;
setentry *entry;
Py_ssize_t pos = 0;
+ int rv;
+
+ if (PySet_GET_SIZE(so) == 0) {
+ return set_copy(so);
+ }
if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
return set_copy_and_difference(so, other);
@@ -1516,17 +1574,15 @@ set_difference(PySetObject *so, PyObject *other)
if (PyDict_CheckExact(other)) {
while (set_next(so, &pos, &entry)) {
- setentry entrycopy;
- int rv;
- entrycopy.hash = entry->hash;
- entrycopy.key = entry->key;
- rv = _PyDict_Contains(other, entry->key, entry->hash);
+ key = entry->key;
+ hash = entry->hash;
+ rv = _PyDict_Contains(other, key, hash);
if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, &entrycopy)) {
+ if (set_add_entry((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1537,13 +1593,15 @@ set_difference(PySetObject *so, PyObject *other)
/* Iterate over so, checking for common elements in other. */
while (set_next(so, &pos, &entry)) {
- int rv = set_contains_entry((PySetObject *)other, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_contains_entry((PySetObject *)other, key, hash);
+ if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
- if (set_add_entry((PySetObject *)result, entry)) {
+ if (set_add_entry((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}
@@ -1605,29 +1663,24 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
PySetObject *otherset;
PyObject *key;
Py_ssize_t pos = 0;
+ Py_hash_t hash;
setentry *entry;
+ int rv;
if ((PyObject *)so == other)
return set_clear(so);
if (PyDict_CheckExact(other)) {
PyObject *value;
- int rv;
- Py_hash_t hash;
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
- setentry an_entry;
-
Py_INCREF(key);
- an_entry.hash = hash;
- an_entry.key = key;
-
- rv = set_discard_entry(so, &an_entry);
- if (rv == -1) {
+ rv = set_discard_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(key);
return NULL;
}
if (rv == DISCARD_NOTFOUND) {
- if (set_add_entry(so, &an_entry)) {
+ if (set_add_entry(so, key, hash)) {
Py_DECREF(key);
return NULL;
}
@@ -1647,13 +1700,15 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other)
}
while (set_next(otherset, &pos, &entry)) {
- int rv = set_discard_entry(so, entry);
- if (rv == -1) {
+ key = entry->key;
+ hash = entry->hash;
+ rv = set_discard_entry(so, key, hash);
+ if (rv < 0) {
Py_DECREF(otherset);
return NULL;
}
if (rv == DISCARD_NOTFOUND) {
- if (set_add_entry(so, entry)) {
+ if (set_add_entry(so, key, hash)) {
Py_DECREF(otherset);
return NULL;
}
@@ -1715,6 +1770,7 @@ set_issubset(PySetObject *so, PyObject *other)
{
setentry *entry;
Py_ssize_t pos = 0;
+ int rv;
if (!PyAnySet_Check(other)) {
PyObject *tmp, *result;
@@ -1729,8 +1785,8 @@ set_issubset(PySetObject *so, PyObject *other)
Py_RETURN_FALSE;
while (set_next(so, &pos, &entry)) {
- int rv = set_contains_entry((PySetObject *)other, entry);
- if (rv == -1)
+ rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
+ if (rv < 0)
return NULL;
if (!rv)
Py_RETURN_FALSE;
@@ -1821,7 +1877,7 @@ set_contains(PySetObject *so, PyObject *key)
int rv;
rv = set_contains_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return -1;
PyErr_Clear();
@@ -1840,7 +1896,7 @@ set_direct_contains(PySetObject *so, PyObject *key)
long result;
result = set_contains(so, key);
- if (result == -1)
+ if (result < 0)
return NULL;
return PyBool_FromLong(result);
}
@@ -1854,7 +1910,7 @@ set_remove(PySetObject *so, PyObject *key)
int rv;
rv = set_discard_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
@@ -1863,7 +1919,7 @@ set_remove(PySetObject *so, PyObject *key)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
- if (rv == -1)
+ if (rv < 0)
return NULL;
}
@@ -1886,7 +1942,7 @@ set_discard(PySetObject *so, PyObject *key)
int rv;
rv = set_discard_key(so, key);
- if (rv == -1) {
+ if (rv < 0) {
if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
return NULL;
PyErr_Clear();
@@ -1895,7 +1951,7 @@ set_discard(PySetObject *so, PyObject *key)
return NULL;
rv = set_discard_key(so, tmpkey);
Py_DECREF(tmpkey);
- if (rv == -1)
+ if (rv < 0)
return NULL;
}
Py_RETURN_NONE;
@@ -1949,13 +2005,12 @@ set_init(PySetObject *self, PyObject *args, PyObject *kwds)
{
PyObject *iterable = NULL;
- if (!PyAnySet_Check(self))
- return -1;
- if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
+ if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds))
return -1;
if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
return -1;
- set_clear_internal(self);
+ if (self->fill)
+ set_clear_internal(self);
self->hash = -1;
if (iterable == NULL)
return 0;
@@ -2122,7 +2177,7 @@ static PyMethodDef frozenset_methods[] = {
copy_doc},
{"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
difference_doc},
- {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
+ {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
intersection_doc},
{"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
isdisjoint_doc},
@@ -2193,7 +2248,7 @@ PyTypeObject PyFrozenSet_Type = {
(traverseproc)set_traverse, /* tp_traverse */
(inquiry)set_clear_internal, /* tp_clear */
(richcmpfunc)set_richcompare, /* tp_richcompare */
- offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
+ offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
(getiterfunc)set_iter, /* tp_iter */
0, /* tp_iternext */
frozenset_methods, /* tp_methods */
@@ -2277,6 +2332,18 @@ PySet_Add(PyObject *anyset, PyObject *key)
}
int
+PySet_ClearFreeList(void)
+{
+ return 0;
+}
+
+void
+PySet_Fini(void)
+{
+ Py_CLEAR(emptyfrozenset);
+}
+
+int
_PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
{
setentry *entry;
@@ -2399,7 +2466,7 @@ test_c_api(PySetObject *so)
/* Exercise direct iteration */
i = 0, count = 0;
while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
- s = _PyUnicode_AsString(x);
+ s = PyUnicode_AsUTF8(x);
assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
count++;
}