summaryrefslogtreecommitdiff
path: root/Objects/dictobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/dictobject.c')
-rw-r--r--Objects/dictobject.c1868
1 files changed, 1157 insertions, 711 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 11c086ffb4..a7b403bcec 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1,4 +1,3 @@
-
/* Dictionary object implementation using a hash table */
/* The distribution includes a separate file, Objects/dictnotes.txt,
@@ -7,68 +6,112 @@
tuning dictionaries, and several ideas for possible optimizations.
*/
+/* PyDictKeysObject
-/*
-There are four kinds of slots in the table:
+This implements the dictionary's hashtable.
-1. Unused. me_key == me_value == NULL
- Does not hold an active (key, value) pair now and never did. Unused can
- transition to Active upon key insertion. This is the only case in which
- me_key is NULL, and is each slot's initial state.
+As of Python 3.6, this is compact and ordered. Basic idea is described here.
+https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
-2. Active. me_key != NULL and me_key != dummy and me_value != NULL
- Holds an active (key, value) pair. Active can transition to Dummy or
- Pending upon key deletion (for combined and split tables respectively).
- This is the only case in which me_value != NULL.
+layout:
-3. Dummy. me_key == dummy and me_value == NULL
- Previously held an active (key, value) pair, but that was deleted and an
- active pair has not yet overwritten the slot. Dummy can transition to
- Active upon key insertion. Dummy slots cannot be made Unused again
- (cannot have me_key set to NULL), else the probe sequence in case of
- collision would have no way to know they were once active.
++---------------+
+| dk_refcnt |
+| dk_size |
+| dk_lookup |
+| dk_usable |
+| dk_nentries |
++---------------+
+| dk_indices |
+| |
++---------------+
+| dk_entries |
+| |
++---------------+
-4. Pending. Not yet inserted or deleted from a split-table.
- key != NULL, key != dummy and value == NULL
+dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
+or DKIX_DUMMY(-2).
+Size of indices is dk_size. Type of each index in indices is vary on dk_size:
+* int8 for dk_size <= 128
+* int16 for 256 <= dk_size <= 2**15
+* int32 for 2**16 <= dk_size <= 2**31
+* int64 for 2**32 <= dk_size
+
+dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size).
+DK_ENTRIES(dk) can be used to get pointer to entries.
+
+NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
+dk_indices entry is signed integer and int16 is used for table which
+dk_size == 256.
+*/
+
+
+/*
The DictObject can be in one of two forms.
+
Either:
A combined table:
ma_values == NULL, dk_refcnt == 1.
Values are stored in the me_value field of the PyDictKeysObject.
- Slot kind 4 is not allowed i.e.
- key != NULL, key != dummy and value == NULL is illegal.
Or:
A split table:
ma_values != NULL, dk_refcnt >= 1
Values are stored in the ma_values array.
- Only string (unicode) keys are allowed, no <dummy> keys are present.
+ Only string (unicode) keys are allowed.
+ All dicts sharing same key must have same insertion order.
+
+There are four kinds of slots in the table (slot is index, and
+DK_ENTRIES(keys)[index] if index >= 0):
+
+1. Unused. index == DKIX_EMPTY
+ Does not hold an active (key, value) pair now and never did. Unused can
+ transition to Active upon key insertion. This is each slot's initial state.
+
+2. Active. index >= 0, me_key != NULL and me_value != NULL
+ Holds an active (key, value) pair. Active can transition to Dummy or
+ Pending upon key deletion (for combined and split tables respectively).
+ This is the only case in which me_value != NULL.
+
+3. Dummy. index == DKIX_DUMMY (combined only)
+ Previously held an active (key, value) pair, but that was deleted and an
+ active pair has not yet overwritten the slot. Dummy can transition to
+ Active upon key insertion. Dummy slots cannot be made Unused again
+ else the probe sequence in case of collision would have no way to know
+ they were once active.
-Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
-hold a search finger. The me_hash field of Unused or Dummy slots has no
-meaning otherwise. As a consequence of this popitem always converts the dict
-to the combined-table form.
+4. Pending. index >= 0, key != NULL, and value == NULL (split only)
+ Not yet inserted in split-table.
*/
-/* PyDict_MINSIZE_SPLIT is the minimum size of a split dictionary.
- * It must be a power of 2, and at least 4.
- * Resizing of split dictionaries is very rare, so the saving memory is more
- * important than the cost of resizing.
- */
-#define PyDict_MINSIZE_SPLIT 4
+/*
+Preserving insertion order
+
+It's simple for combined table. Since dk_entries is mostly append only, we can
+get insertion order by just iterating dk_entries.
-/* PyDict_MINSIZE_COMBINED is the starting size for any new, non-split dict.
+One exception is .popitem(). It removes last item in dk_entries and decrement
+dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
+dk_indices, we can't increment dk_usable even though dk_nentries is
+decremented.
+
+In split table, inserting into pending entry is allowed only for dk_entries[ix]
+where ix == mp->ma_used. Inserting into other index and deleting item cause
+converting the dict to the combined table.
+*/
+
+/* PyDict_MINSIZE is the starting size for any new dict.
* 8 allows dicts with no more than 5 active entries; experiments suggested
* this suffices for the majority of dicts (consisting mostly of usually-small
* dicts created to pass keyword arguments).
* Making this 8, rather than 4 reduces the number of resizes for most
* dictionaries, without any significant extra memory use.
*/
-#define PyDict_MINSIZE_COMBINED 8
+#define PyDict_MINSIZE 8
#include "Python.h"
#include "dict-common.h"
-#include "stringlib/eq.h"
+#include "stringlib/eq.h" /* to get unicode_eq() */
/*[clinic input]
class dict "PyDictObject *" "&PyDict_Type"
@@ -141,8 +184,8 @@ The other half of the strategy is to get the other bits of the hash code
into play. This is done by initializing a (unsigned) vrbl "perturb" to the
full hash code, and changing the recurrence to:
- j = (5*j) + 1 + perturb;
perturb >>= PERTURB_SHIFT;
+ j = (5*j) + 1 + perturb;
use j % 2**i as the next table index;
Now the probe sequence depends (eventually) on every bit in the hash code,
@@ -177,41 +220,38 @@ equally good collision statistics, needed less code & used less memory.
*/
-/* Object used as dummy key to fill deleted entries
- * This could be any unique object,
- * use a custom type in order to minimise coupling.
-*/
-static PyObject _dummy_struct;
-
-#define dummy (&_dummy_struct)
-
-#ifdef Py_REF_DEBUG
-PyObject *
-_PyDict_Dummy(void)
-{
- return dummy;
-}
-#endif
-
/* forward declarations */
-static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *
+static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
+ Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
+ Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos);
+static Py_ssize_t
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr);
+ Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
+ Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos);
static int dictresize(PyDictObject *mp, Py_ssize_t minused);
-/* Dictionary reuse scheme to save calls to malloc, free, and memset */
+/*Global counter used to set ma_version_tag field of dictionary.
+ * It is incremented each time that a dictionary is created and each
+ * time that a dictionary is modified. */
+static uint64_t pydict_global_version = 0;
+
+#define DICT_NEXT_VERSION() (++pydict_global_version)
+
+/* Dictionary reuse scheme to save calls to malloc and free */
#ifndef PyDict_MAXFREELIST
#define PyDict_MAXFREELIST 80
#endif
static PyDictObject *free_list[PyDict_MAXFREELIST];
static int numfree = 0;
+static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
+static int numfreekeys = 0;
#include "clinic/dictobject.c.h"
@@ -219,12 +259,15 @@ int
PyDict_ClearFreeList(void)
{
PyDictObject *op;
- int ret = numfree;
+ int ret = numfree + numfreekeys;
while (numfree) {
op = free_list[--numfree];
assert(PyDict_CheckExact(op));
PyObject_GC_Del(op);
}
+ while (numfreekeys) {
+ PyObject_FREE(keys_free_list[--numfreekeys]);
+ }
return ret;
}
@@ -243,40 +286,116 @@ PyDict_Fini(void)
PyDict_ClearFreeList();
}
+#define DK_SIZE(dk) ((dk)->dk_size)
+#if SIZEOF_VOID_P > 4
+#define DK_IXSIZE(dk) \
+ (DK_SIZE(dk) <= 0xff ? \
+ 1 : DK_SIZE(dk) <= 0xffff ? \
+ 2 : DK_SIZE(dk) <= 0xffffffff ? \
+ 4 : sizeof(int64_t))
+#else
+#define DK_IXSIZE(dk) \
+ (DK_SIZE(dk) <= 0xff ? \
+ 1 : DK_SIZE(dk) <= 0xffff ? \
+ 2 : sizeof(int32_t))
+#endif
+#define DK_ENTRIES(dk) \
+ ((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)]))
+
#define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
#define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
#define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt)
#define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk)
-#define DK_SIZE(dk) ((dk)->dk_size)
#define DK_MASK(dk) (((dk)->dk_size)-1)
#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
+/* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
+static inline Py_ssize_t
+dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
+{
+ Py_ssize_t s = DK_SIZE(keys);
+ Py_ssize_t ix;
+
+ if (s <= 0xff) {
+ int8_t *indices = keys->dk_indices.as_1;
+ ix = indices[i];
+ }
+ else if (s <= 0xffff) {
+ int16_t *indices = keys->dk_indices.as_2;
+ ix = indices[i];
+ }
+#if SIZEOF_VOID_P > 4
+ else if (s > 0xffffffff) {
+ int64_t *indices = keys->dk_indices.as_8;
+ ix = indices[i];
+ }
+#endif
+ else {
+ int32_t *indices = keys->dk_indices.as_4;
+ ix = indices[i];
+ }
+ assert(ix >= DKIX_DUMMY);
+ return ix;
+}
+
+/* write to indices. */
+static inline void
+dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
+{
+ Py_ssize_t s = DK_SIZE(keys);
+
+ assert(ix >= DKIX_DUMMY);
+
+ if (s <= 0xff) {
+ int8_t *indices = keys->dk_indices.as_1;
+ assert(ix <= 0x7f);
+ indices[i] = (char)ix;
+ }
+ else if (s <= 0xffff) {
+ int16_t *indices = keys->dk_indices.as_2;
+ assert(ix <= 0x7fff);
+ indices[i] = (int16_t)ix;
+ }
+#if SIZEOF_VOID_P > 4
+ else if (s > 0xffffffff) {
+ int64_t *indices = keys->dk_indices.as_8;
+ indices[i] = ix;
+ }
+#endif
+ else {
+ int32_t *indices = keys->dk_indices.as_4;
+ assert(ix <= 0x7fffffff);
+ indices[i] = (int32_t)ix;
+ }
+}
+
+
/* USABLE_FRACTION is the maximum dictionary load.
- * Currently set to (2n+1)/3. Increasing this ratio makes dictionaries more
- * dense resulting in more collisions. Decreasing it improves sparseness
- * at the expense of spreading entries over more cache lines and at the
- * cost of total memory consumed.
+ * Increasing this ratio makes dictionaries more dense resulting in more
+ * collisions. Decreasing it improves sparseness at the expense of spreading
+ * indices over more cache lines and at the cost of total memory consumed.
*
* USABLE_FRACTION must obey the following:
* (0 < USABLE_FRACTION(n) < n) for all n >= 2
*
- * USABLE_FRACTION should be very quick to calculate.
- * Fractions around 5/8 to 2/3 seem to work well in practice.
+ * USABLE_FRACTION should be quick to calculate.
+ * Fractions around 1/2 to 2/3 seem to work well in practice.
*/
+#define USABLE_FRACTION(n) (((n) << 1)/3)
-/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for
- * combined tables (the two fractions round to the same number n < ),
- * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite
- * a lot of space for small, split tables */
-#define USABLE_FRACTION(n) ((((n) << 1)+1)/3)
+/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
+ * This can be used to reserve enough size to insert n entries without
+ * resizing.
+ */
+#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
-/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make
+/* Alternative fraction that is otherwise close enough to 2n/3 to make
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
* 32 * 2/3 = 21, 32 * 5/8 = 20.
* Its advantage is that it is faster to compute on machines with slow division.
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
-*/
+ */
/* GROWTH_RATE. Growth rate upon hitting maximum load.
* Currently set to used*2 + capacity/2.
@@ -300,61 +419,155 @@ PyDict_Fini(void)
* (which cannot fail and thus can do no allocation).
*/
static PyDictKeysObject empty_keys_struct = {
- 2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */
+ 1, /* dk_refcnt */
1, /* dk_size */
lookdict_split, /* dk_lookup */
0, /* dk_usable (immutable) */
- {
- { 0, 0, 0 } /* dk_entries (empty) */
- }
+ 0, /* dk_nentries */
+ .dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
+ DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}},
};
static PyObject *empty_values[1] = { NULL };
#define Py_EMPTY_KEYS &empty_keys_struct
+/* Uncomment to check the dict content in _PyDict_CheckConsistency() */
+/* #define DEBUG_PYDICT */
+
+
+#ifdef Py_DEBUG
+static int
+_PyDict_CheckConsistency(PyDictObject *mp)
+{
+ PyDictKeysObject *keys = mp->ma_keys;
+ int splitted = _PyDict_HasSplitTable(mp);
+ Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
+#ifdef DEBUG_PYDICT
+ PyDictKeyEntry *entries = DK_ENTRIES(keys);
+ Py_ssize_t i;
+#endif
+
+ assert(0 <= mp->ma_used && mp->ma_used <= usable);
+ assert(IS_POWER_OF_2(keys->dk_size));
+ assert(0 <= keys->dk_usable
+ && keys->dk_usable <= usable);
+ assert(0 <= keys->dk_nentries
+ && keys->dk_nentries <= usable);
+ assert(keys->dk_usable + keys->dk_nentries <= usable);
+
+ if (!splitted) {
+ /* combined table */
+ assert(keys->dk_refcnt == 1);
+ }
+
+#ifdef DEBUG_PYDICT
+ for (i=0; i < keys->dk_size; i++) {
+ Py_ssize_t ix = dk_get_index(keys, i);
+ assert(DKIX_DUMMY <= ix && ix <= usable);
+ }
+
+ for (i=0; i < usable; i++) {
+ PyDictKeyEntry *entry = &entries[i];
+ PyObject *key = entry->me_key;
+
+ if (key != NULL) {
+ if (PyUnicode_CheckExact(key)) {
+ Py_hash_t hash = ((PyASCIIObject *)key)->hash;
+ assert(hash != -1);
+ assert(entry->me_hash == hash);
+ }
+ else {
+ /* test_dict fails if PyObject_Hash() is called again */
+ assert(entry->me_hash != -1);
+ }
+ if (!splitted) {
+ assert(entry->me_value != NULL);
+ }
+ }
+
+ if (splitted) {
+ assert(entry->me_value == NULL);
+ }
+ }
+
+ if (splitted) {
+ /* splitted table */
+ for (i=0; i < mp->ma_used; i++) {
+ assert(mp->ma_values[i] != NULL);
+ }
+ }
+#endif
+
+ return 1;
+}
+#endif
+
+
static PyDictKeysObject *new_keys_object(Py_ssize_t size)
{
PyDictKeysObject *dk;
- Py_ssize_t i;
- PyDictKeyEntry *ep0;
+ Py_ssize_t es, usable;
- assert(size >= PyDict_MINSIZE_SPLIT);
+ assert(size >= PyDict_MINSIZE);
assert(IS_POWER_OF_2(size));
- dk = PyMem_MALLOC(sizeof(PyDictKeysObject) +
- sizeof(PyDictKeyEntry) * (size-1));
- if (dk == NULL) {
- PyErr_NoMemory();
- return NULL;
+
+ usable = USABLE_FRACTION(size);
+ if (size <= 0xff) {
+ es = 1;
+ }
+ else if (size <= 0xffff) {
+ es = 2;
+ }
+#if SIZEOF_VOID_P > 4
+ else if (size <= 0xffffffff) {
+ es = 4;
+ }
+#endif
+ else {
+ es = sizeof(Py_ssize_t);
+ }
+
+ if (size == PyDict_MINSIZE && numfreekeys > 0) {
+ dk = keys_free_list[--numfreekeys];
+ }
+ else {
+ dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
+ - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ + es * size
+ + sizeof(PyDictKeyEntry) * usable);
+ if (dk == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
}
DK_DEBUG_INCREF dk->dk_refcnt = 1;
dk->dk_size = size;
- dk->dk_usable = USABLE_FRACTION(size);
- ep0 = &dk->dk_entries[0];
- /* Hash value of slot 0 is used by popitem, so it must be initialized */
- ep0->me_hash = 0;
- for (i = 0; i < size; i++) {
- ep0[i].me_key = NULL;
- ep0[i].me_value = NULL;
- }
+ dk->dk_usable = usable;
dk->dk_lookup = lookdict_unicode_nodummy;
+ dk->dk_nentries = 0;
+ memset(&dk->dk_indices.as_1[0], 0xff, es * size);
+ memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
return dk;
}
static void
free_keys_object(PyDictKeysObject *keys)
{
- PyDictKeyEntry *entries = &keys->dk_entries[0];
+ PyDictKeyEntry *entries = DK_ENTRIES(keys);
Py_ssize_t i, n;
- for (i = 0, n = DK_SIZE(keys); i < n; i++) {
+ for (i = 0, n = keys->dk_nentries; i < n; i++) {
Py_XDECREF(entries[i].me_key);
Py_XDECREF(entries[i].me_value);
}
- PyMem_FREE(keys);
+ if (keys->dk_size == PyDict_MINSIZE && numfreekeys < PyDict_MAXFREELIST) {
+ keys_free_list[numfreekeys++] = keys;
+ return;
+ }
+ PyObject_FREE(keys);
}
#define new_values(size) PyMem_NEW(PyObject *, size)
-
#define free_values(values) PyMem_FREE(values)
/* Consumes a reference to the keys object */
@@ -380,6 +593,8 @@ new_dict(PyDictKeysObject *keys, PyObject **values)
mp->ma_keys = keys;
mp->ma_values = values;
mp->ma_used = 0;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ assert(_PyDict_CheckConsistency(mp));
return (PyObject *)mp;
}
@@ -390,7 +605,7 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
PyObject **values;
Py_ssize_t i, size;
- size = DK_SIZE(keys);
+ size = USABLE_FRACTION(DK_SIZE(keys));
values = new_values(size);
if (values == NULL) {
DK_DECREF(keys);
@@ -405,12 +620,44 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
PyObject *
PyDict_New(void)
{
- PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+ PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
if (keys == NULL)
return NULL;
return new_dict(keys, NULL);
}
+/* Search index of hash table from offset of entry table */
+static Py_ssize_t
+lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
+{
+ size_t i;
+ size_t mask = DK_MASK(k);
+ Py_ssize_t ix;
+
+ i = (size_t)hash & mask;
+ ix = dk_get_index(k, i);
+ if (ix == index) {
+ return i;
+ }
+ if (ix == DKIX_EMPTY) {
+ return DKIX_EMPTY;
+ }
+
+ for (size_t perturb = hash;;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
+ ix = dk_get_index(k, i);
+ if (ix == index) {
+ return i;
+ }
+ if (ix == DKIX_EMPTY) {
+ return DKIX_EMPTY;
+ }
+ }
+ assert(0); /* NOT REACHED */
+ return DKIX_ERROR;
+}
+
/*
The basic lookup function used by all operations.
This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -426,52 +673,66 @@ The details in this version are due to Tim Peters, building on many past
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
Christian Tismer.
-lookdict() is general-purpose, and may return NULL if (and only if) a
-comparison raises an exception (this was new in Python 2.5).
+lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
+comparison raises an exception.
lookdict_unicode() below is specialized to string keys, comparison of which can
-never raise an exception; that function can never return NULL.
+never raise an exception; that function can never return DKIX_ERROR.
lookdict_unicode_nodummy is further specialized for string keys that cannot be
the <dummy> value.
-For both, when the key isn't found a PyDictEntry* is returned
-where the key would have been found, *value_addr points to the matching value
-slot.
+For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns
+where the key index should be inserted.
*/
-static PyDictKeyEntry *
+static Py_ssize_t
lookdict(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr)
+ Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
- size_t i;
- size_t perturb;
- PyDictKeyEntry *freeslot;
- size_t mask;
- PyDictKeyEntry *ep0;
- PyDictKeyEntry *ep;
+ size_t i, mask;
+ Py_ssize_t ix, freeslot;
int cmp;
+ PyDictKeysObject *dk;
+ PyDictKeyEntry *ep0, *ep;
PyObject *startkey;
top:
- mask = DK_MASK(mp->ma_keys);
- ep0 = &mp->ma_keys->dk_entries[0];
+ dk = mp->ma_keys;
+ mask = DK_MASK(dk);
+ ep0 = DK_ENTRIES(dk);
i = (size_t)hash & mask;
- ep = &ep0[i];
- if (ep->me_key == NULL || ep->me_key == key) {
- *value_addr = &ep->me_value;
- return ep;
+
+ ix = dk_get_index(dk, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ if (ix == DKIX_DUMMY) {
+ freeslot = i;
}
- if (ep->me_key == dummy)
- freeslot = ep;
else {
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL);
+ if (ep->me_key == key) {
+ *value_addr = &ep->me_value;
+ if (hashpos != NULL)
+ *hashpos = i;
+ return ix;
+ }
if (ep->me_hash == hash) {
startkey = ep->me_key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
- if (cmp < 0)
- return NULL;
- if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+ if (cmp < 0) {
+ *value_addr = NULL;
+ return DKIX_ERROR;
+ }
+ if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
*value_addr = &ep->me_value;
- return ep;
+ if (hashpos != NULL)
+ *hashpos = i;
+ return ix;
}
}
else {
@@ -479,40 +740,50 @@ top:
goto top;
}
}
- freeslot = NULL;
+ freeslot = -1;
}
- /* In the loop, me_key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
- if (ep->me_key == NULL) {
- if (freeslot == NULL) {
- *value_addr = &ep->me_value;
- return ep;
- } else {
- *value_addr = &freeslot->me_value;
- return freeslot;
+ for (size_t perturb = hash;;) {
+ perturb >>= PERTURB_SHIFT;
+ i = ((i << 2) + i + perturb + 1) & mask;
+ ix = dk_get_index(dk, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL) {
+ *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
}
+ *value_addr = NULL;
+ return ix;
}
+ if (ix == DKIX_DUMMY) {
+ if (freeslot == -1)
+ freeslot = i;
+ continue;
+ }
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL);
if (ep->me_key == key) {
+ if (hashpos != NULL) {
+ *hashpos = i;
+ }
*value_addr = &ep->me_value;
- return ep;
+ return ix;
}
- if (ep->me_hash == hash && ep->me_key != dummy) {
+ if (ep->me_hash == hash) {
startkey = ep->me_key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
*value_addr = NULL;
- return NULL;
+ return DKIX_ERROR;
}
- if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+ if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
+ if (hashpos != NULL) {
+ *hashpos = i;
+ }
*value_addr = &ep->me_value;
- return ep;
+ return ix;
}
}
else {
@@ -520,72 +791,80 @@ top:
goto top;
}
}
- else if (ep->me_key == dummy && freeslot == NULL)
- freeslot = ep;
}
assert(0); /* NOT REACHED */
return 0;
}
/* Specialized version for string-only keys */
-static PyDictKeyEntry *
+static Py_ssize_t
lookdict_unicode(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr)
+ Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
size_t i;
- size_t perturb;
- PyDictKeyEntry *freeslot;
size_t mask = DK_MASK(mp->ma_keys);
- PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
- PyDictKeyEntry *ep;
+ Py_ssize_t ix, freeslot;
+ PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+ assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
if (!PyUnicode_CheckExact(key)) {
mp->ma_keys->dk_lookup = lookdict;
- return lookdict(mp, key, hash, value_addr);
+ return lookdict(mp, key, hash, value_addr, hashpos);
}
i = (size_t)hash & mask;
- ep = &ep0[i];
- if (ep->me_key == NULL || ep->me_key == key) {
- *value_addr = &ep->me_value;
- return ep;
+ ix = dk_get_index(mp->ma_keys, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ if (ix == DKIX_DUMMY) {
+ freeslot = i;
}
- if (ep->me_key == dummy)
- freeslot = ep;
else {
- if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL);
+ if (ep->me_key == key
+ || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+ if (hashpos != NULL)
+ *hashpos = i;
*value_addr = &ep->me_value;
- return ep;
+ return ix;
}
- freeslot = NULL;
+ freeslot = -1;
}
- /* In the loop, me_key == dummy is by far (factor of 100s) the
- least likely outcome, so test for that last. */
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
- if (ep->me_key == NULL) {
- if (freeslot == NULL) {
- *value_addr = &ep->me_value;
- return ep;
- } else {
- *value_addr = &freeslot->me_value;
- return freeslot;
+ for (size_t perturb = hash;;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
+ ix = dk_get_index(mp->ma_keys, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL) {
+ *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
}
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ if (ix == DKIX_DUMMY) {
+ if (freeslot == -1)
+ freeslot = i;
+ continue;
}
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL);
if (ep->me_key == key
- || (ep->me_hash == hash
- && ep->me_key != dummy
- && unicode_eq(ep->me_key, key))) {
+ || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = &ep->me_value;
- return ep;
+ if (hashpos != NULL) {
+ *hashpos = i;
+ }
+ return ix;
}
- if (ep->me_key == dummy && freeslot == NULL)
- freeslot = ep;
}
assert(0); /* NOT REACHED */
return 0;
@@ -593,40 +872,63 @@ lookdict_unicode(PyDictObject *mp, PyObject *key,
/* Faster version of lookdict_unicode when it is known that no <dummy> keys
* will be present. */
-static PyDictKeyEntry *
+static Py_ssize_t
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr)
+ Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos)
{
size_t i;
- size_t perturb;
size_t mask = DK_MASK(mp->ma_keys);
- PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
+ PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+ assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
if (!PyUnicode_CheckExact(key)) {
mp->ma_keys->dk_lookup = lookdict;
- return lookdict(mp, key, hash, value_addr);
+ return lookdict(mp, key, hash, value_addr, hashpos);
}
i = (size_t)hash & mask;
- ep = &ep0[i];
- assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == NULL || ep->me_key == key ||
+ ix = dk_get_index(mp->ma_keys, i);
+ assert (ix != DKIX_DUMMY);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL);
+ assert(PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+ if (hashpos != NULL)
+ *hashpos = i;
*value_addr = &ep->me_value;
- return ep;
- }
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
- assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == NULL || ep->me_key == key ||
+ return ix;
+ }
+ for (size_t perturb = hash;;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
+ ix = dk_get_index(mp->ma_keys, i);
+ assert (ix != DKIX_DUMMY);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+ if (hashpos != NULL)
+ *hashpos = i;
*value_addr = &ep->me_value;
- return ep;
+ return ix;
}
}
assert(0); /* NOT REACHED */
@@ -638,39 +940,62 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
* Split tables only contain unicode keys and no dummy keys,
* so algorithm is the same as lookdict_unicode_nodummy.
*/
-static PyDictKeyEntry *
+static Py_ssize_t
lookdict_split(PyDictObject *mp, PyObject *key,
- Py_hash_t hash, PyObject ***value_addr)
+ Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
size_t i;
- size_t perturb;
size_t mask = DK_MASK(mp->ma_keys);
- PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
+ PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+ /* mp must split table */
+ assert(mp->ma_values != NULL);
if (!PyUnicode_CheckExact(key)) {
- ep = lookdict(mp, key, hash, value_addr);
- /* lookdict expects a combined-table, so fix value_addr */
- i = ep - ep0;
- *value_addr = &mp->ma_values[i];
- return ep;
+ ix = lookdict(mp, key, hash, value_addr, hashpos);
+ if (ix >= 0) {
+ *value_addr = &mp->ma_values[ix];
+ }
+ return ix;
}
+
i = (size_t)hash & mask;
- ep = &ep0[i];
- assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == NULL || ep->me_key == key ||
+ ix = dk_get_index(mp->ma_keys, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ assert(ix >= 0);
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
- *value_addr = &mp->ma_values[i];
- return ep;
- }
- for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
- assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
- if (ep->me_key == NULL || ep->me_key == key ||
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = &mp->ma_values[ix];
+ return ix;
+ }
+ for (size_t perturb = hash;;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
+ ix = dk_get_index(mp->ma_keys, i);
+ if (ix == DKIX_EMPTY) {
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = NULL;
+ return DKIX_EMPTY;
+ }
+ assert(ix >= 0);
+ ep = &ep0[ix];
+ assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+ if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
- *value_addr = &mp->ma_values[i & mask];
- return ep;
+ if (hashpos != NULL)
+ *hashpos = i;
+ *value_addr = &mp->ma_values[ix];
+ return ix;
}
}
assert(0); /* NOT REACHED */
@@ -707,27 +1032,27 @@ _PyDict_MaybeUntrack(PyObject *op)
{
PyDictObject *mp;
PyObject *value;
- Py_ssize_t i, size;
+ Py_ssize_t i, numentries;
+ PyDictKeyEntry *ep0;
if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
return;
mp = (PyDictObject *) op;
- size = DK_SIZE(mp->ma_keys);
+ ep0 = DK_ENTRIES(mp->ma_keys);
+ numentries = mp->ma_keys->dk_nentries;
if (_PyDict_HasSplitTable(mp)) {
- for (i = 0; i < size; i++) {
+ for (i = 0; i < numentries; i++) {
if ((value = mp->ma_values[i]) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value)) {
- assert(!_PyObject_GC_MAY_BE_TRACKED(
- mp->ma_keys->dk_entries[i].me_key));
+ assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
return;
}
}
}
else {
- PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
- for (i = 0; i < size; i++) {
+ for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value) ||
@@ -739,33 +1064,35 @@ _PyDict_MaybeUntrack(PyObject *op)
}
/* Internal function to find slot for an item from its hash
- * when it is known that the key is not present in the dict.
- */
-static PyDictKeyEntry *
+ when it is known that the key is not present in the dict.
+
+ The dict must be combined. */
+static void
find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
- PyObject ***value_addr)
+ PyObject ***value_addr, Py_ssize_t *hashpos)
{
size_t i;
- size_t perturb;
size_t mask = DK_MASK(mp->ma_keys);
- PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
+ PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+ assert(!_PyDict_HasSplitTable(mp));
+ assert(hashpos != NULL);
assert(key != NULL);
+
if (!PyUnicode_CheckExact(key))
mp->ma_keys->dk_lookup = lookdict;
i = hash & mask;
- ep = &ep0[i];
- for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
+ ix = dk_get_index(mp->ma_keys, i);
+ for (size_t perturb = hash; ix != DKIX_EMPTY;) {
+ perturb >>= PERTURB_SHIFT;
i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
+ ix = dk_get_index(mp->ma_keys, i & mask);
}
+ ep = &ep0[mp->ma_keys->dk_nentries];
+ *hashpos = i & mask;
assert(ep->me_value == NULL);
- if (mp->ma_values)
- *value_addr = &mp->ma_values[i & mask];
- else
- *value_addr = &ep->me_value;
- return ep;
+ *value_addr = &ep->me_value;
}
static int
@@ -784,58 +1111,88 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
{
PyObject *old_value;
PyObject **value_addr;
- PyDictKeyEntry *ep;
- assert(key != dummy);
+ PyDictKeyEntry *ep, *ep0;
+ Py_ssize_t hashpos, ix;
if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
if (insertion_resize(mp) < 0)
return -1;
}
- ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
- if (ep == NULL) {
+ ix = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr, &hashpos);
+ if (ix == DKIX_ERROR) {
return -1;
}
+
assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
Py_INCREF(value);
MAINTAIN_TRACKING(mp, key, value);
- old_value = *value_addr;
- if (old_value != NULL) {
- assert(ep->me_key != NULL && ep->me_key != dummy);
- *value_addr = value;
- Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
+
+ /* When insertion order is different from shared key, we can't share
+ * the key anymore. Convert this instance to combine table.
+ */
+ if (_PyDict_HasSplitTable(mp) &&
+ ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+ (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+ if (insertion_resize(mp) < 0) {
+ Py_DECREF(value);
+ return -1;
+ }
+ find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+ ix = DKIX_EMPTY;
}
- else {
- if (ep->me_key == NULL) {
- Py_INCREF(key);
- if (mp->ma_keys->dk_usable <= 0) {
- /* Need to resize. */
- if (insertion_resize(mp) < 0) {
- Py_DECREF(key);
- Py_DECREF(value);
- return -1;
- }
- ep = find_empty_slot(mp, key, hash, &value_addr);
+
+ if (ix == DKIX_EMPTY) {
+ /* Insert into new slot. */
+ if (mp->ma_keys->dk_usable <= 0) {
+ /* Need to resize. */
+ if (insertion_resize(mp) < 0) {
+ Py_DECREF(value);
+ return -1;
}
- mp->ma_keys->dk_usable--;
- assert(mp->ma_keys->dk_usable >= 0);
- ep->me_key = key;
- ep->me_hash = hash;
+ find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+ }
+ ep0 = DK_ENTRIES(mp->ma_keys);
+ ep = &ep0[mp->ma_keys->dk_nentries];
+ dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
+ Py_INCREF(key);
+ ep->me_key = key;
+ ep->me_hash = hash;
+ if (mp->ma_values) {
+ assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+ mp->ma_values[mp->ma_keys->dk_nentries] = value;
}
else {
- if (ep->me_key == dummy) {
- Py_INCREF(key);
- ep->me_key = key;
- ep->me_hash = hash;
- Py_DECREF(dummy);
- } else {
- assert(_PyDict_HasSplitTable(mp));
- }
+ ep->me_value = value;
}
mp->ma_used++;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ mp->ma_keys->dk_usable--;
+ mp->ma_keys->dk_nentries++;
+ assert(mp->ma_keys->dk_usable >= 0);
+ assert(_PyDict_CheckConsistency(mp));
+ return 0;
+ }
+
+ assert(value_addr != NULL);
+
+ old_value = *value_addr;
+ if (old_value != NULL) {
*value_addr = value;
- assert(ep->me_key != NULL && ep->me_key != dummy);
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ assert(_PyDict_CheckConsistency(mp));
+
+ Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
+ return 0;
}
+
+ /* pending state */
+ assert(_PyDict_HasSplitTable(mp));
+ assert(ix == mp->ma_used);
+ *value_addr = value;
+ mp->ma_used++;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ assert(_PyDict_CheckConsistency(mp));
return 0;
}
@@ -854,24 +1211,24 @@ insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
PyObject *value)
{
size_t i;
- size_t perturb;
PyDictKeysObject *k = mp->ma_keys;
size_t mask = (size_t)DK_SIZE(k)-1;
- PyDictKeyEntry *ep0 = &k->dk_entries[0];
+ PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
PyDictKeyEntry *ep;
assert(k->dk_lookup != NULL);
assert(value != NULL);
assert(key != NULL);
- assert(key != dummy);
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
i = hash & mask;
- ep = &ep0[i];
- for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
- i = (i << 2) + i + perturb + 1;
- ep = &ep0[i & mask];
+ for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
}
+ ep = &ep0[k->dk_nentries];
assert(ep->me_value == NULL);
+ dk_set_index(k, i, k->dk_nentries);
+ k->dk_nentries++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
@@ -888,16 +1245,16 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared().
*/
static int
-dictresize(PyDictObject *mp, Py_ssize_t minused)
+dictresize(PyDictObject *mp, Py_ssize_t minsize)
{
- Py_ssize_t newsize;
+ Py_ssize_t i, newsize;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
- Py_ssize_t i, oldsize;
+ PyDictKeyEntry *ep0;
-/* Find the smallest table size > minused. */
- for (newsize = PyDict_MINSIZE_COMBINED;
- newsize <= minused && newsize > 0;
+ /* Find the smallest table size > minused. */
+ for (newsize = PyDict_MINSIZE;
+ newsize < minsize && newsize > 0;
newsize <<= 1)
;
if (newsize <= 0) {
@@ -912,56 +1269,45 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
mp->ma_keys = oldkeys;
return -1;
}
+ // New table must be large enough.
+ assert(mp->ma_keys->dk_usable >= mp->ma_used);
if (oldkeys->dk_lookup == lookdict)
mp->ma_keys->dk_lookup = lookdict;
- oldsize = DK_SIZE(oldkeys);
mp->ma_values = NULL;
- /* If empty then nothing to copy so just return */
- if (oldsize == 1) {
- assert(oldkeys == Py_EMPTY_KEYS);
- DK_DECREF(oldkeys);
- return 0;
- }
+ ep0 = DK_ENTRIES(oldkeys);
/* Main loop below assumes we can transfer refcount to new keys
* and that value is stored in me_value.
* Increment ref-counts and copy values here to compensate
* This (resizing a split table) should be relatively rare */
if (oldvalues != NULL) {
- for (i = 0; i < oldsize; i++) {
+ for (i = 0; i < oldkeys->dk_nentries; i++) {
if (oldvalues[i] != NULL) {
- Py_INCREF(oldkeys->dk_entries[i].me_key);
- oldkeys->dk_entries[i].me_value = oldvalues[i];
+ Py_INCREF(ep0[i].me_key);
+ ep0[i].me_value = oldvalues[i];
}
}
}
/* Main loop */
- for (i = 0; i < oldsize; i++) {
- PyDictKeyEntry *ep = &oldkeys->dk_entries[i];
+ for (i = 0; i < oldkeys->dk_nentries; i++) {
+ PyDictKeyEntry *ep = &ep0[i];
if (ep->me_value != NULL) {
- assert(ep->me_key != dummy);
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
}
mp->ma_keys->dk_usable -= mp->ma_used;
if (oldvalues != NULL) {
/* NULL out me_value slot in oldkeys, in case it was shared */
- for (i = 0; i < oldsize; i++)
- oldkeys->dk_entries[i].me_value = NULL;
- assert(oldvalues != empty_values);
- free_values(oldvalues);
+ for (i = 0; i < oldkeys->dk_nentries; i++)
+ ep0[i].me_value = NULL;
DK_DECREF(oldkeys);
+ if (oldvalues != empty_values) {
+ free_values(oldvalues);
+ }
}
else {
assert(oldkeys->dk_lookup != lookdict_split);
- if (oldkeys->dk_lookup != lookdict_unicode_nodummy) {
- PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0];
- for (i = 0; i < oldsize; i++) {
- if (ep0[i].me_key == dummy)
- Py_DECREF(dummy);
- }
- }
assert(oldkeys->dk_refcnt == 1);
- DK_DEBUG_DECREF PyMem_FREE(oldkeys);
+ DK_DEBUG_DECREF PyObject_FREE(oldkeys);
}
return 0;
}
@@ -985,16 +1331,14 @@ make_keys_shared(PyObject *op)
return NULL;
}
else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
- /* Remove dummy keys
- * -1 is required since dictresize() uses key size > minused
- */
- if (dictresize(mp, DK_SIZE(mp->ma_keys) - 1))
+ /* Remove dummy keys */
+ if (dictresize(mp, DK_SIZE(mp->ma_keys)))
return NULL;
}
assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
/* Copy values into a new array */
- ep0 = &mp->ma_keys->dk_entries[0];
- size = DK_SIZE(mp->ma_keys);
+ ep0 = DK_ENTRIES(mp->ma_keys);
+ size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
values = new_values(size);
if (values == NULL) {
PyErr_SetString(PyExc_MemoryError,
@@ -1015,12 +1359,26 @@ make_keys_shared(PyObject *op)
PyObject *
_PyDict_NewPresized(Py_ssize_t minused)
{
+ const Py_ssize_t max_presize = 128 * 1024;
Py_ssize_t newsize;
PyDictKeysObject *new_keys;
- for (newsize = PyDict_MINSIZE_COMBINED;
- newsize <= minused && newsize > 0;
- newsize <<= 1)
- ;
+
+ /* There are no strict guarantee that returned dict can contain minused
+ * items without resize. So we create medium size dict instead of very
+ * large dict or MemoryError.
+ */
+ if (minused > USABLE_FRACTION(max_presize)) {
+ newsize = max_presize;
+ }
+ else {
+ Py_ssize_t minsize = ESTIMATE_SIZE(minused);
+ newsize = PyDict_MINSIZE;
+ while (newsize < minsize) {
+ newsize <<= 1;
+ }
+ }
+ assert(IS_POWER_OF_2(newsize));
+
new_keys = new_keys_object(newsize);
if (new_keys == NULL)
return NULL;
@@ -1041,8 +1399,8 @@ PyObject *
PyDict_GetItem(PyObject *op, PyObject *key)
{
Py_hash_t hash;
+ Py_ssize_t ix;
PyDictObject *mp = (PyDictObject *)op;
- PyDictKeyEntry *ep;
PyThreadState *tstate;
PyObject **value_addr;
@@ -1068,15 +1426,15 @@ PyDict_GetItem(PyObject *op, PyObject *key)
/* preserve the existing exception */
PyObject *err_type, *err_value, *err_tb;
PyErr_Fetch(&err_type, &err_value, &err_tb);
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
/* ignore errors */
PyErr_Restore(err_type, err_value, err_tb);
- if (ep == NULL)
+ if (ix < 0)
return NULL;
}
else {
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL) {
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix < 0) {
PyErr_Clear();
return NULL;
}
@@ -1084,39 +1442,25 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return *value_addr;
}
+/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
+ This returns NULL *with* an exception set if an exception occurred.
+ It returns NULL *without* an exception set if the key wasn't present.
+*/
PyObject *
_PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
{
+ Py_ssize_t ix;
PyDictObject *mp = (PyDictObject *)op;
- PyDictKeyEntry *ep;
- PyThreadState *tstate;
PyObject **value_addr;
- if (!PyDict_Check(op))
+ if (!PyDict_Check(op)) {
+ PyErr_BadInternalCall();
return NULL;
-
- /* We can arrive here with a NULL tstate during initialization: try
- running "python -Wi" for an example related to string interning.
- Let's just hope that no exception occurs then... This must be
- _PyThreadState_Current and not PyThreadState_GET() because in debug
- mode, the latter complains if tstate is NULL. */
- tstate = _PyThreadState_UncheckedGet();
- if (tstate != NULL && tstate->curexc_type != NULL) {
- /* preserve the existing exception */
- PyObject *err_type, *err_value, *err_tb;
- PyErr_Fetch(&err_type, &err_value, &err_tb);
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- /* ignore errors */
- PyErr_Restore(err_type, err_value, err_tb);
- if (ep == NULL)
- return NULL;
}
- else {
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL) {
- PyErr_Clear();
- return NULL;
- }
+
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix < 0) {
+ return NULL;
}
return *value_addr;
}
@@ -1128,9 +1472,9 @@ _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
PyObject *
PyDict_GetItemWithError(PyObject *op, PyObject *key)
{
+ Py_ssize_t ix;
Py_hash_t hash;
PyDictObject*mp = (PyDictObject *)op;
- PyDictKeyEntry *ep;
PyObject **value_addr;
if (!PyDict_Check(op)) {
@@ -1146,8 +1490,8 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
}
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix < 0)
return NULL;
return *value_addr;
}
@@ -1162,39 +1506,40 @@ _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
return PyDict_GetItemWithError(dp, kv);
}
-/* Fast version of global value lookup.
+/* Fast version of global value lookup (LOAD_GLOBAL).
* Lookup in globals, then builtins.
+ *
+ * Raise an exception and return NULL if an error occurred (ex: computing the
+ * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
+ * exist. Return the value if the key exists.
*/
PyObject *
_PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
{
- PyObject *x;
- if (PyUnicode_CheckExact(key)) {
- PyObject **value_addr;
- Py_hash_t hash = ((PyASCIIObject *)key)->hash;
- if (hash != -1) {
- PyDictKeyEntry *e;
- e = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr);
- if (e == NULL) {
- return NULL;
- }
- x = *value_addr;
- if (x != NULL)
- return x;
- e = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr);
- if (e == NULL) {
- return NULL;
- }
- x = *value_addr;
- return x;
- }
+ Py_ssize_t ix;
+ Py_hash_t hash;
+ PyObject **value_addr;
+
+ if (!PyUnicode_CheckExact(key) ||
+ (hash = ((PyASCIIObject *) key)->hash) == -1)
+ {
+ hash = PyObject_Hash(key);
+ if (hash == -1)
+ return NULL;
}
- x = PyDict_GetItemWithError((PyObject *)globals, key);
- if (x != NULL)
- return x;
- if (PyErr_Occurred())
+
+ /* namespace 1: globals */
+ ix = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
return NULL;
- return PyDict_GetItemWithError((PyObject *)builtins, key);
+ if (ix != DKIX_EMPTY && *value_addr != NULL)
+ return *value_addr;
+
+ /* namespace 2: builtins */
+ ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr, NULL);
+ if (ix < 0)
+ return NULL;
+ return *value_addr;
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -1247,36 +1592,33 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
}
static int
-delitem_common(PyDictObject *mp, PyDictKeyEntry *ep, PyObject **value_addr)
+delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix,
+ PyObject **value_addr)
{
PyObject *old_key, *old_value;
+ PyDictKeyEntry *ep;
old_value = *value_addr;
+ assert(old_value != NULL);
*value_addr = NULL;
mp->ma_used--;
- if (!_PyDict_HasSplitTable(mp)) {
- ENSURE_ALLOWS_DELETIONS(mp);
- old_key = ep->me_key;
- Py_INCREF(dummy);
- ep->me_key = dummy;
- Py_DECREF(old_key);
- }
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ ep = &DK_ENTRIES(mp->ma_keys)[ix];
+ dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+ ENSURE_ALLOWS_DELETIONS(mp);
+ old_key = ep->me_key;
+ ep->me_key = NULL;
+ Py_DECREF(old_key);
Py_DECREF(old_value);
+
+ assert(_PyDict_CheckConsistency(mp));
return 0;
}
int
PyDict_DelItem(PyObject *op, PyObject *key)
{
- PyDictObject *mp;
Py_hash_t hash;
- PyDictKeyEntry *ep;
- PyObject **value_addr;
-
- if (!PyDict_Check(op)) {
- PyErr_BadInternalCall();
- return -1;
- }
assert(key);
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1284,22 +1626,15 @@ PyDict_DelItem(PyObject *op, PyObject *key)
if (hash == -1)
return -1;
}
- mp = (PyDictObject *)op;
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
- return -1;
- if (*value_addr == NULL) {
- _PyErr_SetKeyError(key);
- return -1;
- }
- return delitem_common(mp, ep, value_addr);
+
+ return _PyDict_DelItem_KnownHash(op, key, hash);
}
int
_PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
{
+ Py_ssize_t hashpos, ix;
PyDictObject *mp;
- PyDictKeyEntry *ep;
PyObject **value_addr;
if (!PyDict_Check(op)) {
@@ -1309,23 +1644,37 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
assert(key);
assert(hash != -1);
mp = (PyDictObject *)op;
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ if (ix == DKIX_ERROR)
return -1;
- if (*value_addr == NULL) {
+ if (ix == DKIX_EMPTY || *value_addr == NULL) {
_PyErr_SetKeyError(key);
return -1;
}
- return delitem_common(mp, ep, value_addr);
+ assert(dk_get_index(mp->ma_keys, hashpos) == ix);
+
+ // Split table doesn't allow deletion. Combine it.
+ if (_PyDict_HasSplitTable(mp)) {
+ if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+ return -1;
+ }
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ assert(ix >= 0);
+ }
+ return delitem_common(mp, hashpos, ix, value_addr);
}
+/* This function promises that the predicate -> deletion sequence is atomic
+ * (i.e. protected by the GIL), assuming the predicate itself doesn't
+ * release the GIL.
+ */
int
_PyDict_DelItemIf(PyObject *op, PyObject *key,
int (*predicate)(PyObject *value))
{
+ Py_ssize_t hashpos, ix;
PyDictObject *mp;
Py_hash_t hash;
- PyDictKeyEntry *ep;
PyObject **value_addr;
int res;
@@ -1338,18 +1687,29 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
if (hash == -1)
return -1;
mp = (PyDictObject *)op;
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ if (ix == DKIX_ERROR)
return -1;
- if (*value_addr == NULL) {
+ if (ix == DKIX_EMPTY || *value_addr == NULL) {
_PyErr_SetKeyError(key);
return -1;
}
+ assert(dk_get_index(mp->ma_keys, hashpos) == ix);
+
+ // Split table doesn't allow deletion. Combine it.
+ if (_PyDict_HasSplitTable(mp)) {
+ if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+ return -1;
+ }
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ assert(ix >= 0);
+ }
+
res = predicate(*value_addr);
if (res == -1)
return -1;
if (res > 0)
- return delitem_common(mp, ep, value_addr);
+ return delitem_common(mp, hashpos, ix, value_addr);
else
return 0;
}
@@ -1375,9 +1735,10 @@ PyDict_Clear(PyObject *op)
mp->ma_keys = Py_EMPTY_KEYS;
mp->ma_values = empty_values;
mp->ma_used = 0;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
/* ...then clear the keys and values */
if (oldvalues != NULL) {
- n = DK_SIZE(oldkeys);
+ n = oldkeys->dk_nentries;
for (i = 0; i < n; i++)
Py_CLEAR(oldvalues[i]);
free_values(oldvalues);
@@ -1387,42 +1748,59 @@ PyDict_Clear(PyObject *op)
assert(oldkeys->dk_refcnt == 1);
DK_DECREF(oldkeys);
}
+ assert(_PyDict_CheckConsistency(mp));
}
-/* Returns -1 if no more items (or op is not a dict),
- * index of item otherwise. Stores value in pvalue
+/* Internal version of PyDict_Next that returns a hash value in addition
+ * to the key and value.
+ * Return 1 on success, return 0 when the reached the end of the dictionary
+ * (or if op is not a dictionary)
*/
-Py_LOCAL_INLINE(Py_ssize_t)
-dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
+int
+_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
+ PyObject **pvalue, Py_hash_t *phash)
{
- Py_ssize_t mask, offset;
+ Py_ssize_t i, n;
PyDictObject *mp;
- PyObject **value_ptr;
-
+ PyDictKeyEntry *entry_ptr;
+ PyObject *value;
if (!PyDict_Check(op))
- return -1;
+ return 0;
mp = (PyDictObject *)op;
- if (i < 0)
- return -1;
+ i = *ppos;
+ n = mp->ma_keys->dk_nentries;
+ if ((size_t)i >= (size_t)n)
+ return 0;
if (mp->ma_values) {
- value_ptr = &mp->ma_values[i];
- offset = sizeof(PyObject *);
+ PyObject **value_ptr = &mp->ma_values[i];
+ while (i < n && *value_ptr == NULL) {
+ value_ptr++;
+ i++;
+ }
+ if (i >= n)
+ return 0;
+ entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
+ value = *value_ptr;
}
else {
- value_ptr = &mp->ma_keys->dk_entries[i].me_value;
- offset = sizeof(PyDictKeyEntry);
- }
- mask = DK_MASK(mp->ma_keys);
- while (i <= mask && *value_ptr == NULL) {
- value_ptr = (PyObject **)(((char *)value_ptr) + offset);
- i++;
+ entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
+ while (i < n && entry_ptr->me_value == NULL) {
+ entry_ptr++;
+ i++;
+ }
+ if (i >= n)
+ return 0;
+ value = entry_ptr->me_value;
}
- if (i > mask)
- return -1;
+ *ppos = i+1;
+ if (pkey)
+ *pkey = entry_ptr->me_key;
+ if (phash)
+ *phash = entry_ptr->me_hash;
if (pvalue)
- *pvalue = *value_ptr;
- return i;
+ *pvalue = value;
+ return 1;
}
/*
@@ -1432,9 +1810,12 @@ dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
* PyObject *key, *value;
* i = 0; # important! i should not otherwise be changed by you
* while (PyDict_Next(yourdict, &i, &key, &value)) {
- * Refer to borrowed references in key and value.
+ * Refer to borrowed references in key and value.
* }
*
+ * Return 1 on success, return 0 when the reached the end of the dictionary
+ * (or if op is not a dictionary)
+ *
* CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
* mutates the dict. One exception: it is safe if the loop merely changes
* the values associated with the keys (but doesn't insert new keys or
@@ -1443,43 +1824,21 @@ dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
int
PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
{
- PyDictObject *mp;
- Py_ssize_t i = dict_next(op, *ppos, pvalue);
- if (i < 0)
- return 0;
- mp = (PyDictObject *)op;
- *ppos = i+1;
- if (pkey)
- *pkey = mp->ma_keys->dk_entries[i].me_key;
- return 1;
-}
-
-/* Internal version of PyDict_Next that returns a hash value in addition
- * to the key and value.
- */
-int
-_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
- PyObject **pvalue, Py_hash_t *phash)
-{
- PyDictObject *mp;
- Py_ssize_t i = dict_next(op, *ppos, pvalue);
- if (i < 0)
- return 0;
- mp = (PyDictObject *)op;
- *ppos = i+1;
- *phash = mp->ma_keys->dk_entries[i].me_hash;
- if (pkey)
- *pkey = mp->ma_keys->dk_entries[i].me_key;
- return 1;
+ return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
}
/* Internal version of dict.pop(). */
PyObject *
-_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *deflt)
+_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
{
+ Py_ssize_t ix, hashpos;
PyObject *old_value, *old_key;
PyDictKeyEntry *ep;
PyObject **value_addr;
+ PyDictObject *mp;
+
+ assert(PyDict_Check(dict));
+ mp = (PyDictObject *)dict;
if (mp->ma_used == 0) {
if (deflt) {
@@ -1489,11 +1848,10 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject
_PyErr_SetKeyError(key);
return NULL;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ if (ix == DKIX_ERROR)
return NULL;
- old_value = *value_addr;
- if (old_value == NULL) {
+ if (ix == DKIX_EMPTY || *value_addr == NULL) {
if (deflt) {
Py_INCREF(deflt);
return deflt;
@@ -1501,24 +1859,38 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject
_PyErr_SetKeyError(key);
return NULL;
}
+
+ // Split table doesn't allow deletion. Combine it.
+ if (_PyDict_HasSplitTable(mp)) {
+ if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+ return NULL;
+ }
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ assert(ix >= 0);
+ }
+
+ old_value = *value_addr;
+ assert(old_value != NULL);
*value_addr = NULL;
mp->ma_used--;
- if (!_PyDict_HasSplitTable(mp)) {
- ENSURE_ALLOWS_DELETIONS(mp);
- old_key = ep->me_key;
- Py_INCREF(dummy);
- ep->me_key = dummy;
- Py_DECREF(old_key);
- }
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+ ep = &DK_ENTRIES(mp->ma_keys)[ix];
+ ENSURE_ALLOWS_DELETIONS(mp);
+ old_key = ep->me_key;
+ ep->me_key = NULL;
+ Py_DECREF(old_key);
+
+ assert(_PyDict_CheckConsistency(mp));
return old_value;
}
PyObject *
-_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
+_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
{
Py_hash_t hash;
- if (mp->ma_used == 0) {
+ if (((PyDictObject *)dict)->ma_used == 0) {
if (deflt) {
Py_INCREF(deflt);
return deflt;
@@ -1532,7 +1904,7 @@ _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
if (hash == -1)
return NULL;
}
- return _PyDict_Pop_KnownHash(mp, key, hash, deflt);
+ return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
}
/* Internal version of dict.from_keys(). It is subclass-friendly. */
@@ -1556,7 +1928,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;
- if (dictresize(mp, Py_SIZE(iterable))) {
+ if (dictresize(mp, ESTIMATE_SIZE(Py_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
@@ -1575,7 +1947,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key;
Py_hash_t hash;
- if (dictresize(mp, PySet_GET_SIZE(iterable))) {
+ if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d);
return NULL;
}
@@ -1635,7 +2007,7 @@ dict_dealloc(PyDictObject *mp)
Py_TRASHCAN_SAFE_BEGIN(mp)
if (values != NULL) {
if (values != empty_values) {
- for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+ for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
Py_XDECREF(values[i]);
}
free_values(values);
@@ -1747,8 +2119,8 @@ static PyObject *
dict_subscript(PyDictObject *mp, PyObject *key)
{
PyObject *v;
+ Py_ssize_t ix;
Py_hash_t hash;
- PyDictKeyEntry *ep;
PyObject **value_addr;
if (!PyUnicode_CheckExact(key) ||
@@ -1757,11 +2129,10 @@ dict_subscript(PyDictObject *mp, PyObject *key)
if (hash == -1)
return NULL;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
return NULL;
- v = *value_addr;
- if (v == NULL) {
+ if (ix == DKIX_EMPTY || *value_addr == NULL) {
if (!PyDict_CheckExact(mp)) {
/* Look up __missing__ method if we're a subclass. */
PyObject *missing, *res;
@@ -1779,8 +2150,8 @@ dict_subscript(PyDictObject *mp, PyObject *key)
_PyErr_SetKeyError(key);
return NULL;
}
- else
- Py_INCREF(v);
+ v = *value_addr;
+ Py_INCREF(v);
return v;
}
@@ -1820,8 +2191,8 @@ dict_keys(PyDictObject *mp)
Py_DECREF(v);
goto again;
}
- ep = &mp->ma_keys->dk_entries[0];
- size = DK_SIZE(mp->ma_keys);
+ ep = DK_ENTRIES(mp->ma_keys);
+ size = mp->ma_keys->dk_nentries;
if (mp->ma_values) {
value_ptr = mp->ma_values;
offset = sizeof(PyObject *);
@@ -1848,6 +2219,7 @@ dict_values(PyDictObject *mp)
{
PyObject *v;
Py_ssize_t i, j;
+ PyDictKeyEntry *ep;
Py_ssize_t size, n, offset;
PyObject **value_ptr;
@@ -1863,13 +2235,14 @@ dict_values(PyDictObject *mp)
Py_DECREF(v);
goto again;
}
- size = DK_SIZE(mp->ma_keys);
+ ep = DK_ENTRIES(mp->ma_keys);
+ size = mp->ma_keys->dk_nentries;
if (mp->ma_values) {
value_ptr = mp->ma_values;
offset = sizeof(PyObject *);
}
else {
- value_ptr = &mp->ma_keys->dk_entries[0].me_value;
+ value_ptr = &ep[0].me_value;
offset = sizeof(PyDictKeyEntry);
}
for (i = 0, j = 0; i < size; i++) {
@@ -1920,8 +2293,8 @@ dict_items(PyDictObject *mp)
goto again;
}
/* Nothing we do below makes any function calls. */
- ep = mp->ma_keys->dk_entries;
- size = DK_SIZE(mp->ma_keys);
+ ep = DK_ENTRIES(mp->ma_keys);
+ size = mp->ma_keys->dk_nentries;
if (mp->ma_values) {
value_ptr = mp->ma_values;
offset = sizeof(PyObject *);
@@ -1965,7 +2338,8 @@ dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
}
static int
-dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
+dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
+ const char *methname)
{
PyObject *arg = NULL;
int result = 0;
@@ -2067,6 +2441,7 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
}
i = 0;
+ assert(_PyDict_CheckConsistency((PyDictObject *)d));
goto Return;
Fail:
Py_XDECREF(item);
@@ -2077,18 +2452,14 @@ Return:
return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
}
-int
-PyDict_Update(PyObject *a, PyObject *b)
-{
- return PyDict_Merge(a, b, 1);
-}
-
-int
-PyDict_Merge(PyObject *a, PyObject *b, int override)
+static int
+dict_merge(PyObject *a, PyObject *b, int override)
{
PyDictObject *mp, *other;
Py_ssize_t i, n;
- PyDictKeyEntry *entry;
+ PyDictKeyEntry *entry, *ep0;
+
+ assert(0 <= override && override <= 2);
/* We accept for the argument either a concrete dictionary object,
* or an abstract "mapping" object. For the former, we can do
@@ -2115,13 +2486,16 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
* incrementally resizing as we insert new items. Expect
* that there will be no (or few) overlapping keys.
*/
- if (mp->ma_keys->dk_usable * 3 < other->ma_used * 2)
- if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
+ if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
+ if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
return -1;
- for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) {
+ }
+ }
+ ep0 = DK_ENTRIES(other->ma_keys);
+ for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
PyObject *key, *value;
Py_hash_t hash;
- entry = &other->ma_keys->dk_entries[i];
+ entry = &ep0[i];
key = entry->me_key;
hash = entry->me_hash;
if (other->ma_values)
@@ -2133,14 +2507,28 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
int err = 0;
Py_INCREF(key);
Py_INCREF(value);
- if (override || PyDict_GetItem(a, key) == NULL)
+ if (override == 1)
err = insertdict(mp, key, hash, value);
+ else if (_PyDict_GetItem_KnownHash(a, key, hash) == NULL) {
+ if (PyErr_Occurred()) {
+ Py_DECREF(value);
+ Py_DECREF(key);
+ return -1;
+ }
+ err = insertdict(mp, key, hash, value);
+ }
+ else if (override != 0) {
+ _PyErr_SetKeyError(key);
+ Py_DECREF(value);
+ Py_DECREF(key);
+ return -1;
+ }
Py_DECREF(value);
Py_DECREF(key);
if (err != 0)
return -1;
- if (n != DK_SIZE(other->ma_keys)) {
+ if (n != other->ma_keys->dk_nentries) {
PyErr_SetString(PyExc_RuntimeError,
"dict mutated during update");
return -1;
@@ -2169,7 +2557,13 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
return -1;
for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
- if (!override && PyDict_GetItem(a, key) != NULL) {
+ if (override != 1 && PyDict_GetItem(a, key) != NULL) {
+ if (override != 0) {
+ _PyErr_SetKeyError(key);
+ Py_DECREF(key);
+ Py_DECREF(iter);
+ return -1;
+ }
Py_DECREF(key);
continue;
}
@@ -2192,9 +2586,29 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
/* Iterator completed, via error */
return -1;
}
+ assert(_PyDict_CheckConsistency((PyDictObject *)a));
return 0;
}
+int
+PyDict_Update(PyObject *a, PyObject *b)
+{
+ return dict_merge(a, b, 1);
+}
+
+int
+PyDict_Merge(PyObject *a, PyObject *b, int override)
+{
+ /* XXX Deprecate override not in (0, 1). */
+ return dict_merge(a, b, override != 0);
+}
+
+int
+_PyDict_MergeEx(PyObject *a, PyObject *b, int override)
+{
+ return dict_merge(a, b, override);
+}
+
static PyObject *
dict_copy(PyDictObject *mp)
{
@@ -2215,7 +2629,9 @@ PyDict_Copy(PyObject *o)
mp = (PyDictObject *)o;
if (_PyDict_HasSplitTable(mp)) {
PyDictObject *split_copy;
- PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys));
+ Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
+ PyObject **newvalues;
+ newvalues = new_values(size);
if (newvalues == NULL)
return PyErr_NoMemory();
split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
@@ -2227,7 +2643,7 @@ PyDict_Copy(PyObject *o)
split_copy->ma_keys = mp->ma_keys;
split_copy->ma_used = mp->ma_used;
DK_INCREF(mp->ma_keys);
- for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+ for (i = 0, n = size; i < n; i++) {
PyObject *value = mp->ma_values[i];
Py_XINCREF(value);
split_copy->ma_values[i] = value;
@@ -2298,8 +2714,8 @@ dict_equal(PyDictObject *a, PyDictObject *b)
/* can't be equal if # of entries differ */
return 0;
/* Same # of entries -- check all of 'em. Exit early on any diff. */
- for (i = 0; i < DK_SIZE(a->ma_keys); i++) {
- PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i];
+ for (i = 0; i < a->ma_keys->dk_nentries; i++) {
+ PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
PyObject *aval;
if (a->ma_values)
aval = a->ma_values[i];
@@ -2316,7 +2732,7 @@ dict_equal(PyDictObject *a, PyDictObject *b)
/* ditto for key */
Py_INCREF(key);
/* reuse the known hash value */
- if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL)
+ if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr, NULL) < 0)
bval = NULL;
else
bval = *vaddr;
@@ -2374,7 +2790,7 @@ dict___contains__(PyDictObject *self, PyObject *key)
{
register PyDictObject *mp = self;
Py_hash_t hash;
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
PyObject **value_addr;
if (!PyUnicode_CheckExact(key) ||
@@ -2383,10 +2799,12 @@ dict___contains__(PyDictObject *self, PyObject *key)
if (hash == -1)
return NULL;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
return NULL;
- return PyBool_FromLong(*value_addr != NULL);
+ if (ix == DKIX_EMPTY || *value_addr == NULL)
+ Py_RETURN_FALSE;
+ Py_RETURN_TRUE;
}
static PyObject *
@@ -2396,7 +2814,7 @@ dict_get(PyDictObject *mp, PyObject *args)
PyObject *failobj = Py_None;
PyObject *val = NULL;
Py_hash_t hash;
- PyDictKeyEntry *ep;
+ Py_ssize_t ix;
PyObject **value_addr;
if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
@@ -2408,12 +2826,13 @@ dict_get(PyDictObject *mp, PyObject *args)
if (hash == -1)
return NULL;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
return NULL;
- val = *value_addr;
- if (val == NULL)
+ if (ix == DKIX_EMPTY || *value_addr == NULL)
val = failobj;
+ else
+ val = *value_addr;
Py_INCREF(val);
return val;
}
@@ -2422,43 +2841,88 @@ PyObject *
PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
{
PyDictObject *mp = (PyDictObject *)d;
- PyObject *val = NULL;
+ PyObject *value;
Py_hash_t hash;
- PyDictKeyEntry *ep;
+ Py_ssize_t hashpos, ix;
PyObject **value_addr;
if (!PyDict_Check(d)) {
PyErr_BadInternalCall();
return NULL;
}
+
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1) {
hash = PyObject_Hash(key);
if (hash == -1)
return NULL;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- if (ep == NULL)
+
+ if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
+ if (insertion_resize(mp) < 0)
+ return NULL;
+ }
+
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+ if (ix == DKIX_ERROR)
return NULL;
- val = *value_addr;
- if (val == NULL) {
+
+ if (_PyDict_HasSplitTable(mp) &&
+ ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+ (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+ if (insertion_resize(mp) < 0) {
+ return NULL;
+ }
+ find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+ ix = DKIX_EMPTY;
+ }
+
+ if (ix == DKIX_EMPTY) {
+ PyDictKeyEntry *ep, *ep0;
+ value = defaultobj;
if (mp->ma_keys->dk_usable <= 0) {
- /* Need to resize. */
- if (insertion_resize(mp) < 0)
+ if (insertion_resize(mp) < 0) {
return NULL;
- ep = find_empty_slot(mp, key, hash, &value_addr);
+ }
+ find_empty_slot(mp, key, hash, &value_addr, &hashpos);
}
- Py_INCREF(defaultobj);
+ ep0 = DK_ENTRIES(mp->ma_keys);
+ ep = &ep0[mp->ma_keys->dk_nentries];
+ dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
Py_INCREF(key);
- MAINTAIN_TRACKING(mp, key, defaultobj);
+ Py_INCREF(value);
+ MAINTAIN_TRACKING(mp, key, value);
ep->me_key = key;
ep->me_hash = hash;
- *value_addr = defaultobj;
- val = defaultobj;
+ if (mp->ma_values) {
+ assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+ mp->ma_values[mp->ma_keys->dk_nentries] = value;
+ }
+ else {
+ ep->me_value = value;
+ }
+ mp->ma_used++;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
mp->ma_keys->dk_usable--;
+ mp->ma_keys->dk_nentries++;
+ assert(mp->ma_keys->dk_usable >= 0);
+ }
+ else if (*value_addr == NULL) {
+ value = defaultobj;
+ assert(_PyDict_HasSplitTable(mp));
+ assert(ix == mp->ma_used);
+ Py_INCREF(value);
+ MAINTAIN_TRACKING(mp, key, value);
+ *value_addr = value;
mp->ma_used++;
+ mp->ma_version_tag = DICT_NEXT_VERSION();
}
- return val;
+ else {
+ value = *value_addr;
+ }
+
+ assert(_PyDict_CheckConsistency(mp));
+ return value;
}
static PyObject *
@@ -2490,17 +2954,16 @@ dict_pop(PyDictObject *mp, PyObject *args)
if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
return NULL;
- return _PyDict_Pop(mp, key, deflt);
+ return _PyDict_Pop((PyObject*)mp, key, deflt);
}
static PyObject *
dict_popitem(PyDictObject *mp)
{
- Py_hash_t i = 0;
- PyDictKeyEntry *ep;
+ Py_ssize_t i, j;
+ PyDictKeyEntry *ep0, *ep;
PyObject *res;
-
/* Allocate the result tuple before checking the size. Believe it
* or not, this allocation could trigger a garbage collection which
* could empty the dict, so if we checked the size first and that
@@ -2521,68 +2984,64 @@ dict_popitem(PyDictObject *mp)
}
/* Convert split table to combined table */
if (mp->ma_keys->dk_lookup == lookdict_split) {
- /* -1 is required since dictresize() uses key size > minused */
- if (dictresize(mp, DK_SIZE(mp->ma_keys) - 1)) {
+ if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
Py_DECREF(res);
return NULL;
}
}
ENSURE_ALLOWS_DELETIONS(mp);
- /* Set ep to "the first" dict entry with a value. We abuse the hash
- * field of slot 0 to hold a search finger:
- * If slot 0 has a value, use slot 0.
- * Else slot 0 is being used to hold a search finger,
- * and we use its hash value as the first index to look.
- */
- ep = &mp->ma_keys->dk_entries[0];
- if (ep->me_value == NULL) {
- Py_ssize_t mask = DK_MASK(mp->ma_keys);
- i = ep->me_hash;
- /* The hash field may be a real hash value, or it may be a
- * legit search finger, or it may be a once-legit search
- * finger that's out of bounds now because it wrapped around
- * or the table shrunk -- simply make sure it's in bounds now.
- */
- if (i > mask || i < 1)
- i = 1; /* skip slot 0 */
- while ((ep = &mp->ma_keys->dk_entries[i])->me_value == NULL) {
- i++;
- if (i > mask)
- i = 1;
- }
+
+ /* Pop last item */
+ ep0 = DK_ENTRIES(mp->ma_keys);
+ i = mp->ma_keys->dk_nentries - 1;
+ while (i >= 0 && ep0[i].me_value == NULL) {
+ i--;
}
+ assert(i >= 0);
+
+ ep = &ep0[i];
+ j = lookdict_index(mp->ma_keys, ep->me_hash, i);
+ assert(j >= 0);
+ assert(dk_get_index(mp->ma_keys, j) == i);
+ dk_set_index(mp->ma_keys, j, DKIX_DUMMY);
+
PyTuple_SET_ITEM(res, 0, ep->me_key);
PyTuple_SET_ITEM(res, 1, ep->me_value);
- Py_INCREF(dummy);
- ep->me_key = dummy;
+ ep->me_key = NULL;
ep->me_value = NULL;
+ /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
+ mp->ma_keys->dk_nentries = i;
mp->ma_used--;
- assert(mp->ma_keys->dk_entries[0].me_value == NULL);
- mp->ma_keys->dk_entries[0].me_hash = i + 1; /* next place to start */
+ mp->ma_version_tag = DICT_NEXT_VERSION();
+ assert(_PyDict_CheckConsistency(mp));
return res;
}
static int
dict_traverse(PyObject *op, visitproc visit, void *arg)
{
- Py_ssize_t i, n;
PyDictObject *mp = (PyDictObject *)op;
- if (mp->ma_keys->dk_lookup == lookdict) {
- for (i = 0; i < DK_SIZE(mp->ma_keys); i++) {
- if (mp->ma_keys->dk_entries[i].me_value != NULL) {
- Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
- Py_VISIT(mp->ma_keys->dk_entries[i].me_key);
+ PyDictKeysObject *keys = mp->ma_keys;
+ PyDictKeyEntry *entries = DK_ENTRIES(keys);
+ Py_ssize_t i, n = keys->dk_nentries;
+
+ if (keys->dk_lookup == lookdict) {
+ for (i = 0; i < n; i++) {
+ if (entries[i].me_value != NULL) {
+ Py_VISIT(entries[i].me_value);
+ Py_VISIT(entries[i].me_key);
}
}
- } else {
+ }
+ else {
if (mp->ma_values != NULL) {
- for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+ for (i = 0; i < n; i++) {
Py_VISIT(mp->ma_values[i]);
}
}
else {
- for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
- Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
+ for (i = 0; i < n; i++) {
+ Py_VISIT(entries[i].me_value);
}
}
}
@@ -2601,23 +3060,31 @@ static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
Py_ssize_t
_PyDict_SizeOf(PyDictObject *mp)
{
- Py_ssize_t size, res;
+ Py_ssize_t size, usable, res;
size = DK_SIZE(mp->ma_keys);
+ usable = USABLE_FRACTION(size);
+
res = _PyObject_SIZE(Py_TYPE(mp));
if (mp->ma_values)
- res += size * sizeof(PyObject*);
+ res += usable * sizeof(PyObject*);
/* If the dictionary is split, the keys portion is accounted-for
in the type object. */
if (mp->ma_keys->dk_refcnt == 1)
- res += sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry);
+ res += (sizeof(PyDictKeysObject)
+ - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ + DK_IXSIZE(mp->ma_keys) * size
+ + sizeof(PyDictKeyEntry) * usable);
return res;
}
Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject *keys)
{
- return sizeof(PyDictKeysObject) + (DK_SIZE(keys)-1) * sizeof(PyDictKeyEntry);
+ return (sizeof(PyDictKeysObject)
+ - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ + DK_IXSIZE(keys) * DK_SIZE(keys)
+ + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
}
static PyObject *
@@ -2704,8 +3171,8 @@ int
PyDict_Contains(PyObject *op, PyObject *key)
{
Py_hash_t hash;
+ Py_ssize_t ix;
PyDictObject *mp = (PyDictObject *)op;
- PyDictKeyEntry *ep;
PyObject **value_addr;
if (!PyUnicode_CheckExact(key) ||
@@ -2714,8 +3181,10 @@ PyDict_Contains(PyObject *op, PyObject *key)
if (hash == -1)
return -1;
}
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- return (ep == NULL) ? -1 : (*value_addr != NULL);
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
+ return -1;
+ return (ix != DKIX_EMPTY && *value_addr != NULL);
}
/* Internal version of PyDict_Contains used when the hash value is already known */
@@ -2723,11 +3192,13 @@ int
_PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
{
PyDictObject *mp = (PyDictObject *)op;
- PyDictKeyEntry *ep;
PyObject **value_addr;
+ Py_ssize_t ix;
- ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
- return (ep == NULL) ? -1 : (*value_addr != NULL);
+ ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+ if (ix == DKIX_ERROR)
+ return -1;
+ return (ix != DKIX_EMPTY && *value_addr != NULL);
}
/* Hack to implement "key in dict" */
@@ -2761,11 +3232,13 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
_PyObject_GC_UNTRACK(d);
d->ma_used = 0;
- d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+ d->ma_version_tag = DICT_NEXT_VERSION();
+ d->ma_keys = new_keys_object(PyDict_MINSIZE);
if (d->ma_keys == NULL) {
Py_DECREF(self);
return NULL;
}
+ assert(_PyDict_CheckConsistency(d));
return self;
}
@@ -2986,13 +3459,13 @@ static PyMethodDef dictiter_methods[] = {
{NULL, NULL} /* sentinel */
};
-static PyObject *dictiter_iternextkey(dictiterobject *di)
+static PyObject*
+dictiter_iternextkey(dictiterobject *di)
{
PyObject *key;
- Py_ssize_t i, mask, offset;
+ Py_ssize_t i, n;
PyDictKeysObject *k;
PyDictObject *d = di->di_dict;
- PyObject **value_ptr;
if (d == NULL)
return NULL;
@@ -3006,27 +3479,30 @@ static PyObject *dictiter_iternextkey(dictiterobject *di)
}
i = di->di_pos;
- if (i < 0)
- goto fail;
k = d->ma_keys;
+ n = k->dk_nentries;
if (d->ma_values) {
- value_ptr = &d->ma_values[i];
- offset = sizeof(PyObject *);
+ PyObject **value_ptr = &d->ma_values[i];
+ while (i < n && *value_ptr == NULL) {
+ value_ptr++;
+ i++;
+ }
+ if (i >= n)
+ goto fail;
+ key = DK_ENTRIES(k)[i].me_key;
}
else {
- value_ptr = &k->dk_entries[i].me_value;
- offset = sizeof(PyDictKeyEntry);
- }
- mask = DK_SIZE(k)-1;
- while (i <= mask && *value_ptr == NULL) {
- value_ptr = (PyObject **)(((char *)value_ptr) + offset);
- i++;
+ PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
+ while (i < n && entry_ptr->me_value == NULL) {
+ entry_ptr++;
+ i++;
+ }
+ if (i >= n)
+ goto fail;
+ key = entry_ptr->me_key;
}
di->di_pos = i+1;
- if (i > mask)
- goto fail;
di->len--;
- key = k->dk_entries[i].me_key;
Py_INCREF(key);
return key;
@@ -3069,12 +3545,12 @@ PyTypeObject PyDictIterKey_Type = {
0,
};
-static PyObject *dictiter_iternextvalue(dictiterobject *di)
+static PyObject *
+dictiter_iternextvalue(dictiterobject *di)
{
PyObject *value;
- Py_ssize_t i, mask, offset;
+ Py_ssize_t i, n;
PyDictObject *d = di->di_dict;
- PyObject **value_ptr;
if (d == NULL)
return NULL;
@@ -3088,26 +3564,29 @@ static PyObject *dictiter_iternextvalue(dictiterobject *di)
}
i = di->di_pos;
- mask = DK_SIZE(d->ma_keys)-1;
- if (i < 0 || i > mask)
- goto fail;
+ n = d->ma_keys->dk_nentries;
if (d->ma_values) {
- value_ptr = &d->ma_values[i];
- offset = sizeof(PyObject *);
+ PyObject **value_ptr = &d->ma_values[i];
+ while (i < n && *value_ptr == NULL) {
+ value_ptr++;
+ i++;
+ }
+ if (i >= n)
+ goto fail;
+ value = *value_ptr;
}
else {
- value_ptr = &d->ma_keys->dk_entries[i].me_value;
- offset = sizeof(PyDictKeyEntry);
- }
- while (i <= mask && *value_ptr == NULL) {
- value_ptr = (PyObject **)(((char *)value_ptr) + offset);
- i++;
- if (i > mask)
+ PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
+ while (i < n && entry_ptr->me_value == NULL) {
+ entry_ptr++;
+ i++;
+ }
+ if (i >= n)
goto fail;
+ value = entry_ptr->me_value;
}
di->di_pos = i+1;
di->len--;
- value = *value_ptr;
Py_INCREF(value);
return value;
@@ -3138,7 +3617,7 @@ PyTypeObject PyDictIterValue_Type = {
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc)dictiter_traverse, /* tp_traverse */
0, /* tp_clear */
@@ -3150,12 +3629,12 @@ PyTypeObject PyDictIterValue_Type = {
0,
};
-static PyObject *dictiter_iternextitem(dictiterobject *di)
+static PyObject *
+dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
- Py_ssize_t i, mask, offset;
+ Py_ssize_t i, n;
PyDictObject *d = di->di_dict;
- PyObject **value_ptr;
if (d == NULL)
return NULL;
@@ -3169,37 +3648,41 @@ static PyObject *dictiter_iternextitem(dictiterobject *di)
}
i = di->di_pos;
- if (i < 0)
- goto fail;
- mask = DK_SIZE(d->ma_keys)-1;
+ n = d->ma_keys->dk_nentries;
if (d->ma_values) {
- value_ptr = &d->ma_values[i];
- offset = sizeof(PyObject *);
+ PyObject **value_ptr = &d->ma_values[i];
+ while (i < n && *value_ptr == NULL) {
+ value_ptr++;
+ i++;
+ }
+ if (i >= n)
+ goto fail;
+ key = DK_ENTRIES(d->ma_keys)[i].me_key;
+ value = *value_ptr;
}
else {
- value_ptr = &d->ma_keys->dk_entries[i].me_value;
- offset = sizeof(PyDictKeyEntry);
- }
- while (i <= mask && *value_ptr == NULL) {
- value_ptr = (PyObject **)(((char *)value_ptr) + offset);
- i++;
+ PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
+ while (i < n && entry_ptr->me_value == NULL) {
+ entry_ptr++;
+ i++;
+ }
+ if (i >= n)
+ goto fail;
+ key = entry_ptr->me_key;
+ value = entry_ptr->me_value;
}
di->di_pos = i+1;
- if (i > mask)
- goto fail;
-
+ di->len--;
if (result->ob_refcnt == 1) {
Py_INCREF(result);
Py_DECREF(PyTuple_GET_ITEM(result, 0));
Py_DECREF(PyTuple_GET_ITEM(result, 1));
- } else {
+ }
+ else {
result = PyTuple_New(2);
if (result == NULL)
return NULL;
}
- di->len--;
- key = d->ma_keys->dk_entries[i].me_key;
- value = *value_ptr;
Py_INCREF(key);
Py_INCREF(value);
PyTuple_SET_ITEM(result, 0, key); /* steals reference */
@@ -3838,7 +4321,7 @@ dictvalues_new(PyObject *dict)
PyDictKeysObject *
_PyDict_NewKeysForClass(void)
{
- PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_SPLIT);
+ PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
if (keys == NULL)
PyErr_Clear();
else
@@ -3874,7 +4357,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context)
int
_PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
- PyObject *key, PyObject *value)
+ PyObject *key, PyObject *value)
{
PyObject *dict;
int res;
@@ -3901,15 +4384,25 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
else {
int was_shared = cached == ((PyDictObject *)dict)->ma_keys;
res = PyDict_SetItem(dict, key, value);
- /* PyDict_SetItem() may call dictresize() and convert split table
- * into combined table. In such case, convert it to split
- * table again and update type's shared key only when this is
- * the only dict sharing key with the type.
- */
if (was_shared && cached != ((PyDictObject *)dict)->ma_keys) {
+ /* PyDict_SetItem() may call dictresize and convert split table
+ * into combined table. In such case, convert it to split
+ * table again and update type's shared key only when this is
+ * the only dict sharing key with the type.
+ *
+ * This is to allow using shared key in class like this:
+ *
+ * class C:
+ * def __init__(self):
+ * # one dict resize happens
+ * self.a, self.b, self.c = 1, 2, 3
+ * self.d, self.e, self.f = 4, 5, 6
+ * a = C()
+ */
if (cached->dk_refcnt == 1) {
CACHED_KEYS(tp) = make_keys_shared(dict);
- } else {
+ }
+ else {
CACHED_KEYS(tp) = NULL;
}
DK_DECREF(cached);
@@ -3939,50 +4432,3 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
{
DK_DECREF(keys);
}
-
-
-/* ARGSUSED */
-static PyObject *
-dummy_repr(PyObject *op)
-{
- return PyUnicode_FromString("<dummy key>");
-}
-
-/* ARGUSED */
-static void
-dummy_dealloc(PyObject* ignore)
-{
- /* This should never get called, but we also don't want to SEGV if
- * we accidentally decref dummy-key out of existence.
- */
- Py_FatalError("deallocating <dummy key>");
-}
-
-static PyTypeObject PyDictDummy_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "<dummy key> type",
- 0,
- 0,
- dummy_dealloc, /*tp_dealloc*/ /*never called*/
- 0, /*tp_print*/
- 0, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_reserved*/
- dummy_repr, /*tp_repr*/
- 0, /*tp_as_number*/
- 0, /*tp_as_sequence*/
- 0, /*tp_as_mapping*/
- 0, /*tp_hash */
- 0, /*tp_call */
- 0, /*tp_str */
- 0, /*tp_getattro */
- 0, /*tp_setattro */
- 0, /*tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /*tp_flags */
-};
-
-static PyObject _dummy_struct = {
- _PyObject_EXTRA_INIT
- 2, &PyDictDummy_Type
-};
-