1 files changed, 1157 insertions, 711 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 11c086ffb4..a7b403bcec 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1,4 +1,3 @@
-
 /* Dictionary object implementation using a hash table */
 
 /* The distribution includes a separate file, Objects/dictnotes.txt,
@@ -7,68 +6,112 @@
    tuning dictionaries, and several ideas for possible optimizations.
 */
 
+/* PyDictKeysObject
 
-/*
-There are four kinds of slots in the table:
+This implements the dictionary's hashtable.
 
-1. Unused.  me_key == me_value == NULL
-   Does not hold an active (key, value) pair now and never did.  Unused can
-   transition to Active upon key insertion.  This is the only case in which
-   me_key is NULL, and is each slot's initial state.
+As of Python 3.6, this is compact and ordered. Basic idea is described here.
+https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
 
-2. Active.  me_key != NULL and me_key != dummy and me_value != NULL
-   Holds an active (key, value) pair.  Active can transition to Dummy or
-   Pending upon key deletion (for combined and split tables respectively).
-   This is the only case in which me_value != NULL.
+layout:
 
-3. Dummy.  me_key == dummy and me_value == NULL
-   Previously held an active (key, value) pair, but that was deleted and an
-   active pair has not yet overwritten the slot.  Dummy can transition to
-   Active upon key insertion.  Dummy slots cannot be made Unused again
-   (cannot have me_key set to NULL), else the probe sequence in case of
-   collision would have no way to know they were once active.
++---------------+
+| dk_refcnt     |
+| dk_size       |
+| dk_lookup     |
+| dk_usable     |
+| dk_nentries   |
++---------------+
+| dk_indices    |
+|               |
++---------------+
+| dk_entries    |
+|               |
++---------------+
 
-4. Pending. Not yet inserted or deleted from a split-table.
-   key != NULL, key != dummy and value == NULL
+dk_indices is actual hashtable.  It holds index in entries, or DKIX_EMPTY(-1)
+or DKIX_DUMMY(-2).
+Size of indices is dk_size.  Type of each index in indices is vary on dk_size:
 
+* int8  for          dk_size <= 128
+* int16 for 256   <= dk_size <= 2**15
+* int32 for 2**16 <= dk_size <= 2**31
+* int64 for 2**32 <= dk_size
+
+dk_entries is array of PyDictKeyEntry.  It's size is USABLE_FRACTION(dk_size).
+DK_ENTRIES(dk) can be used to get pointer to entries.
+
+NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
+dk_indices entry is signed integer and int16 is used for table which
+dk_size == 256.
+*/
+
+
+/*
 The DictObject can be in one of two forms.
+
 Either:
   A combined table:
     ma_values == NULL, dk_refcnt == 1.
     Values are stored in the me_value field of the PyDictKeysObject.
-    Slot kind 4 is not allowed i.e.
-        key != NULL, key != dummy and value == NULL is illegal.
 Or:
   A split table:
     ma_values != NULL, dk_refcnt >= 1
     Values are stored in the ma_values array.
-    Only string (unicode) keys are allowed, no <dummy> keys are present.
+    Only string (unicode) keys are allowed.
+    All dicts sharing same key must have same insertion order.
+
+There are four kinds of slots in the table (slot is index, and
+DK_ENTRIES(keys)[index] if index >= 0):
+
+1. Unused.  index == DKIX_EMPTY
+   Does not hold an active (key, value) pair now and never did.  Unused can
+   transition to Active upon key insertion.  This is each slot's initial state.
+
+2. Active.  index >= 0, me_key != NULL and me_value != NULL
+   Holds an active (key, value) pair.  Active can transition to Dummy or
+   Pending upon key deletion (for combined and split tables respectively).
+   This is the only case in which me_value != NULL.
+
+3. Dummy.  index == DKIX_DUMMY  (combined only)
+   Previously held an active (key, value) pair, but that was deleted and an
+   active pair has not yet overwritten the slot.  Dummy can transition to
+   Active upon key insertion.  Dummy slots cannot be made Unused again
+   else the probe sequence in case of collision would have no way to know
+   they were once active.
 
-Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
-hold a search finger.  The me_hash field of Unused or Dummy slots has no
-meaning otherwise. As a consequence of this popitem always converts the dict
-to the combined-table form.
+4. Pending. index >= 0, key != NULL, and value == NULL  (split only)
+   Not yet inserted in split-table.
 */
 
-/* PyDict_MINSIZE_SPLIT is the minimum size of a split dictionary.
- * It must be a power of 2, and at least 4.
- * Resizing of split dictionaries is very rare, so the saving memory is more
- * important than the cost of resizing.
- */
-#define PyDict_MINSIZE_SPLIT 4
+/*
+Preserving insertion order
+
+It's simple for combined table.  Since dk_entries is mostly append only, we can
+get insertion order by just iterating dk_entries.
 
-/* PyDict_MINSIZE_COMBINED is the starting size for any new, non-split dict.
+One exception is .popitem().  It removes last item in dk_entries and decrement
+dk_nentries to achieve amortized O(1).  Since there are DKIX_DUMMY remains in
+dk_indices, we can't increment dk_usable even though dk_nentries is
+decremented.
+
+In split table, inserting into pending entry is allowed only for dk_entries[ix]
+where ix == mp->ma_used. Inserting into other index and deleting item cause
+converting the dict to the combined table.
+*/
+
+/* PyDict_MINSIZE is the starting size for any new dict.
  * 8 allows dicts with no more than 5 active entries; experiments suggested
  * this suffices for the majority of dicts (consisting mostly of usually-small
  * dicts created to pass keyword arguments).
  * Making this 8, rather than 4 reduces the number of resizes for most
  * dictionaries, without any significant extra memory use.
  */
-#define PyDict_MINSIZE_COMBINED 8
+#define PyDict_MINSIZE 8
 
 #include "Python.h"
 #include "dict-common.h"
-#include "stringlib/eq.h"
+#include "stringlib/eq.h"    /* to get unicode_eq() */
 
 /*[clinic input]
 class dict "PyDictObject *" "&PyDict_Type"
@@ -141,8 +184,8 @@ The other half of the strategy is to get the other bits of the hash code
 into play.  This is done by initializing a (unsigned) vrbl "perturb" to the
 full hash code, and changing the recurrence to:
 
-    j = (5*j) + 1 + perturb;
     perturb >>= PERTURB_SHIFT;
+    j = (5*j) + 1 + perturb;
     use j % 2**i as the next table index;
 
 Now the probe sequence depends (eventually) on every bit in the hash code,
@@ -177,41 +220,38 @@ equally good collision statistics, needed less code & used less memory.
 
 */
 
-/* Object used as dummy key to fill deleted entries
- * This could be any unique object,
- * use a custom type in order to minimise coupling.
-*/
-static PyObject _dummy_struct;
-
-#define dummy (&_dummy_struct)
-
-#ifdef Py_REF_DEBUG
-PyObject *
-_PyDict_Dummy(void)
-{
-    return dummy;
-}
-#endif
-
 /* forward declarations */
-static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,
-                                Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,
-                                        Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *
+static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
+                           Py_hash_t hash, PyObject ***value_addr,
+                           Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
+                                   Py_hash_t hash, PyObject ***value_addr,
+                                   Py_ssize_t *hashpos);
+static Py_ssize_t
 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
-                         Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,
-                                      Py_hash_t hash, PyObject ***value_addr);
+                         Py_hash_t hash, PyObject ***value_addr,
+                         Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
+                                 Py_hash_t hash, PyObject ***value_addr,
+                                 Py_ssize_t *hashpos);
 
 static int dictresize(PyDictObject *mp, Py_ssize_t minused);
 
-/* Dictionary reuse scheme to save calls to malloc, free, and memset */
+/*Global counter used to set ma_version_tag field of dictionary.
+ * It is incremented each time that a dictionary is created and each
+ * time that a dictionary is modified. */
+static uint64_t pydict_global_version = 0;
+
+#define DICT_NEXT_VERSION() (++pydict_global_version)
+
+/* Dictionary reuse scheme to save calls to malloc and free */
 #ifndef PyDict_MAXFREELIST
 #define PyDict_MAXFREELIST 80
 #endif
 static PyDictObject *free_list[PyDict_MAXFREELIST];
 static int numfree = 0;
+static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
+static int numfreekeys = 0;
 
 #include "clinic/dictobject.c.h"
 
@@ -219,12 +259,15 @@ int
 PyDict_ClearFreeList(void)
 {
     PyDictObject *op;
-    int ret = numfree;
+    int ret = numfree + numfreekeys;
     while (numfree) {
         op = free_list[--numfree];
         assert(PyDict_CheckExact(op));
         PyObject_GC_Del(op);
     }
+    while (numfreekeys) {
+        PyObject_FREE(keys_free_list[--numfreekeys]);
+    }
     return ret;
 }
 
@@ -243,40 +286,116 @@ PyDict_Fini(void)
     PyDict_ClearFreeList();
 }
 
+#define DK_SIZE(dk) ((dk)->dk_size)
+#if SIZEOF_VOID_P > 4
+#define DK_IXSIZE(dk)                          \
+    (DK_SIZE(dk) <= 0xff ?                     \
+        1 : DK_SIZE(dk) <= 0xffff ?            \
+            2 : DK_SIZE(dk) <= 0xffffffff ?    \
+                4 : sizeof(int64_t))
+#else
+#define DK_IXSIZE(dk)                          \
+    (DK_SIZE(dk) <= 0xff ?                     \
+        1 : DK_SIZE(dk) <= 0xffff ?            \
+            2 : sizeof(int32_t))
+#endif
+#define DK_ENTRIES(dk) \
+    ((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)]))
+
 #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
 #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
 
 #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt)
 #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk)
-#define DK_SIZE(dk) ((dk)->dk_size)
 #define DK_MASK(dk) (((dk)->dk_size)-1)
 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
 
+/* lookup indices.  returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
+static inline Py_ssize_t
+dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
+{
+    Py_ssize_t s = DK_SIZE(keys);
+    Py_ssize_t ix;
+
+    if (s <= 0xff) {
+        int8_t *indices = keys->dk_indices.as_1;
+        ix = indices[i];
+    }
+    else if (s <= 0xffff) {
+        int16_t *indices = keys->dk_indices.as_2;
+        ix = indices[i];
+    }
+#if SIZEOF_VOID_P > 4
+    else if (s > 0xffffffff) {
+        int64_t *indices = keys->dk_indices.as_8;
+        ix = indices[i];
+    }
+#endif
+    else {
+        int32_t *indices = keys->dk_indices.as_4;
+        ix = indices[i];
+    }
+    assert(ix >= DKIX_DUMMY);
+    return ix;
+}
+
+/* write to indices. */
+static inline void
+dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
+{
+    Py_ssize_t s = DK_SIZE(keys);
+
+    assert(ix >= DKIX_DUMMY);
+
+    if (s <= 0xff) {
+        int8_t *indices = keys->dk_indices.as_1;
+        assert(ix <= 0x7f);
+        indices[i] = (char)ix;
+    }
+    else if (s <= 0xffff) {
+        int16_t *indices = keys->dk_indices.as_2;
+        assert(ix <= 0x7fff);
+        indices[i] = (int16_t)ix;
+    }
+#if SIZEOF_VOID_P > 4
+    else if (s > 0xffffffff) {
+        int64_t *indices = keys->dk_indices.as_8;
+        indices[i] = ix;
+    }
+#endif
+    else {
+        int32_t *indices = keys->dk_indices.as_4;
+        assert(ix <= 0x7fffffff);
+        indices[i] = (int32_t)ix;
+    }
+}
+
+
 /* USABLE_FRACTION is the maximum dictionary load.
- * Currently set to (2n+1)/3. Increasing this ratio makes dictionaries more
- * dense resulting in more collisions.  Decreasing it improves sparseness
- * at the expense of spreading entries over more cache lines and at the
- * cost of total memory consumed.
+ * Increasing this ratio makes dictionaries more dense resulting in more
+ * collisions.  Decreasing it improves sparseness at the expense of spreading
+ * indices over more cache lines and at the cost of total memory consumed.
  *
  * USABLE_FRACTION must obey the following:
  *     (0 < USABLE_FRACTION(n) < n) for all n >= 2
  *
- * USABLE_FRACTION should be very quick to calculate.
- * Fractions around 5/8 to 2/3 seem to work well in practice.
+ * USABLE_FRACTION should be quick to calculate.
+ * Fractions around 1/2 to 2/3 seem to work well in practice.
  */
+#define USABLE_FRACTION(n) (((n) << 1)/3)
 
-/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for
- * combined tables (the two fractions round to the same number n < ),
- * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite
- * a lot of space for small, split tables */
-#define USABLE_FRACTION(n) ((((n) << 1)+1)/3)
+/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
+ * This can be used to reserve enough size to insert n entries without
+ * resizing.
+ */
+#define ESTIMATE_SIZE(n)  (((n)*3+1) >> 1)
 
-/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make
+/* Alternative fraction that is otherwise close enough to 2n/3 to make
  * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
  * 32 * 2/3 = 21, 32 * 5/8 = 20.
  * Its advantage is that it is faster to compute on machines with slow division.
  * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
-*/
+ */
 
 /* GROWTH_RATE. Growth rate upon hitting maximum load.
  * Currently set to used*2 + capacity/2.
@@ -300,61 +419,155 @@ PyDict_Fini(void)
  * (which cannot fail and thus can do no allocation).
  */
 static PyDictKeysObject empty_keys_struct = {
-        2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */
+        1, /* dk_refcnt */
         1, /* dk_size */
         lookdict_split, /* dk_lookup */
         0, /* dk_usable (immutable) */
-        {
-            { 0, 0, 0 } /* dk_entries (empty) */
-        }
+        0, /* dk_nentries */
+        .dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
+                                 DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}},
 };
 
 static PyObject *empty_values[1] = { NULL };
 
 #define Py_EMPTY_KEYS &empty_keys_struct
 
+/* Uncomment to check the dict content in _PyDict_CheckConsistency() */
+/* #define DEBUG_PYDICT */
+
+
+#ifdef Py_DEBUG
+static int
+_PyDict_CheckConsistency(PyDictObject *mp)
+{
+    PyDictKeysObject *keys = mp->ma_keys;
+    int splitted = _PyDict_HasSplitTable(mp);
+    Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
+#ifdef DEBUG_PYDICT
+    PyDictKeyEntry *entries = DK_ENTRIES(keys);
+    Py_ssize_t i;
+#endif
+
+    assert(0 <= mp->ma_used && mp->ma_used <= usable);
+    assert(IS_POWER_OF_2(keys->dk_size));
+    assert(0 <= keys->dk_usable
+           && keys->dk_usable <= usable);
+    assert(0 <= keys->dk_nentries
+           && keys->dk_nentries <= usable);
+    assert(keys->dk_usable + keys->dk_nentries <= usable);
+
+    if (!splitted) {
+        /* combined table */
+        assert(keys->dk_refcnt == 1);
+    }
+
+#ifdef DEBUG_PYDICT
+    for (i=0; i < keys->dk_size; i++) {
+        Py_ssize_t ix = dk_get_index(keys, i);
+        assert(DKIX_DUMMY <= ix && ix <= usable);
+    }
+
+    for (i=0; i < usable; i++) {
+        PyDictKeyEntry *entry = &entries[i];
+        PyObject *key = entry->me_key;
+
+        if (key != NULL) {
+            if (PyUnicode_CheckExact(key)) {
+                Py_hash_t hash = ((PyASCIIObject *)key)->hash;
+                assert(hash != -1);
+                assert(entry->me_hash == hash);
+            }
+            else {
+                /* test_dict fails if PyObject_Hash() is called again */
+                assert(entry->me_hash != -1);
+            }
+            if (!splitted) {
+                assert(entry->me_value != NULL);
+            }
+        }
+
+        if (splitted) {
+            assert(entry->me_value == NULL);
+        }
+    }
+
+    if (splitted) {
+        /* splitted table */
+        for (i=0; i < mp->ma_used; i++) {
+            assert(mp->ma_values[i] != NULL);
+        }
+    }
+#endif
+
+    return 1;
+}
+#endif
+
+
 static PyDictKeysObject *new_keys_object(Py_ssize_t size)
 {
     PyDictKeysObject *dk;
-    Py_ssize_t i;
-    PyDictKeyEntry *ep0;
+    Py_ssize_t es, usable;
 
-    assert(size >= PyDict_MINSIZE_SPLIT);
+    assert(size >= PyDict_MINSIZE);
     assert(IS_POWER_OF_2(size));
-    dk = PyMem_MALLOC(sizeof(PyDictKeysObject) +
-                      sizeof(PyDictKeyEntry) * (size-1));
-    if (dk == NULL) {
-        PyErr_NoMemory();
-        return NULL;
+
+    usable = USABLE_FRACTION(size);
+    if (size <= 0xff) {
+        es = 1;
+    }
+    else if (size <= 0xffff) {
+        es = 2;
+    }
+#if SIZEOF_VOID_P > 4
+    else if (size <= 0xffffffff) {
+        es = 4;
+    }
+#endif
+    else {
+        es = sizeof(Py_ssize_t);
+    }
+
+    if (size == PyDict_MINSIZE && numfreekeys > 0) {
+        dk = keys_free_list[--numfreekeys];
+    }
+    else {
+        dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
+                             - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+                             + es * size
+                             + sizeof(PyDictKeyEntry) * usable);
+        if (dk == NULL) {
+            PyErr_NoMemory();
+            return NULL;
+        }
     }
     DK_DEBUG_INCREF dk->dk_refcnt = 1;
     dk->dk_size = size;
-    dk->dk_usable = USABLE_FRACTION(size);
-    ep0 = &dk->dk_entries[0];
-    /* Hash value of slot 0 is used by popitem, so it must be initialized */
-    ep0->me_hash = 0;
-    for (i = 0; i < size; i++) {
-        ep0[i].me_key = NULL;
-        ep0[i].me_value = NULL;
-    }
+    dk->dk_usable = usable;
     dk->dk_lookup = lookdict_unicode_nodummy;
+    dk->dk_nentries = 0;
+    memset(&dk->dk_indices.as_1[0], 0xff, es * size);
+    memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
     return dk;
 }
 
 static void
 free_keys_object(PyDictKeysObject *keys)
 {
-    PyDictKeyEntry *entries = &keys->dk_entries[0];
+    PyDictKeyEntry *entries = DK_ENTRIES(keys);
     Py_ssize_t i, n;
-    for (i = 0, n = DK_SIZE(keys); i < n; i++) {
+    for (i = 0, n = keys->dk_nentries; i < n; i++) {
         Py_XDECREF(entries[i].me_key);
         Py_XDECREF(entries[i].me_value);
     }
-    PyMem_FREE(keys);
+    if (keys->dk_size == PyDict_MINSIZE && numfreekeys < PyDict_MAXFREELIST) {
+        keys_free_list[numfreekeys++] = keys;
+        return;
+    }
+    PyObject_FREE(keys);
 }
 
 #define new_values(size) PyMem_NEW(PyObject *, size)
-
 #define free_values(values) PyMem_FREE(values)
 
 /* Consumes a reference to the keys object */
@@ -380,6 +593,8 @@ new_dict(PyDictKeysObject *keys, PyObject **values)
     mp->ma_keys = keys;
     mp->ma_values = values;
     mp->ma_used = 0;
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    assert(_PyDict_CheckConsistency(mp));
     return (PyObject *)mp;
 }
 
@@ -390,7 +605,7 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
     PyObject **values;
     Py_ssize_t i, size;
 
-    size = DK_SIZE(keys);
+    size = USABLE_FRACTION(DK_SIZE(keys));
     values = new_values(size);
     if (values == NULL) {
         DK_DECREF(keys);
@@ -405,12 +620,44 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
 PyObject *
 PyDict_New(void)
 {
-    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
     if (keys == NULL)
         return NULL;
     return new_dict(keys, NULL);
 }
 
+/* Search index of hash table from offset of entry table */
+static Py_ssize_t
+lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
+{
+    size_t i;
+    size_t mask = DK_MASK(k);
+    Py_ssize_t ix;
+
+    i = (size_t)hash & mask;
+    ix = dk_get_index(k, i);
+    if (ix == index) {
+        return i;
+    }
+    if (ix == DKIX_EMPTY) {
+        return DKIX_EMPTY;
+    }
+
+    for (size_t perturb = hash;;) {
+        perturb >>= PERTURB_SHIFT;
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(k, i);
+        if (ix == index) {
+            return i;
+        }
+        if (ix == DKIX_EMPTY) {
+            return DKIX_EMPTY;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return DKIX_ERROR;
+}
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -426,52 +673,66 @@ The details in this version are due to Tim Peters, building on many past
 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
 Christian Tismer.
 
-lookdict() is general-purpose, and may return NULL if (and only if) a
-comparison raises an exception (this was new in Python 2.5).
+lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
+comparison raises an exception.
 lookdict_unicode() below is specialized to string keys, comparison of which can
-never raise an exception; that function can never return NULL.
+never raise an exception; that function can never return DKIX_ERROR.
 lookdict_unicode_nodummy is further specialized for string keys that cannot be
 the <dummy> value.
-For both, when the key isn't found a PyDictEntry* is returned
-where the key would have been found, *value_addr points to the matching value
-slot.
+For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns
+where the key index should be inserted.
 */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict(PyDictObject *mp, PyObject *key,
-         Py_hash_t hash, PyObject ***value_addr)
+         Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
-    PyDictKeyEntry *freeslot;
-    size_t mask;
-    PyDictKeyEntry *ep0;
-    PyDictKeyEntry *ep;
+    size_t i, mask;
+    Py_ssize_t ix, freeslot;
     int cmp;
+    PyDictKeysObject *dk;
+    PyDictKeyEntry *ep0, *ep;
     PyObject *startkey;
 
 top:
-    mask = DK_MASK(mp->ma_keys);
-    ep0 = &mp->ma_keys->dk_entries[0];
+    dk = mp->ma_keys;
+    mask = DK_MASK(dk);
+    ep0 = DK_ENTRIES(dk);
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key) {
-        *value_addr = &ep->me_value;
-        return ep;
+
+    ix = dk_get_index(dk, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    if (ix == DKIX_DUMMY) {
+        freeslot = i;
     }
-    if (ep->me_key == dummy)
-        freeslot = ep;
     else {
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL);
+        if (ep->me_key == key) {
+            *value_addr = &ep->me_value;
+            if (hashpos != NULL)
+                *hashpos = i;
+            return ix;
+        }
         if (ep->me_hash == hash) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
-            if (cmp < 0)
-                return NULL;
-            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+            if (cmp < 0) {
+                *value_addr = NULL;
+                return DKIX_ERROR;
+            }
+            if (dk == mp->ma_keys && ep->me_key == startkey) {
                 if (cmp > 0) {
                     *value_addr = &ep->me_value;
-                    return ep;
+                    if (hashpos != NULL)
+                        *hashpos = i;
+                    return ix;
                 }
             }
             else {
@@ -479,40 +740,50 @@ top:
                 goto top;
             }
         }
-        freeslot = NULL;
+        freeslot = -1;
     }
 
-    /* In the loop, me_key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        if (ep->me_key == NULL) {
-            if (freeslot == NULL) {
-                *value_addr = &ep->me_value;
-                return ep;
-            } else {
-                *value_addr = &freeslot->me_value;
-                return freeslot;
+    for (size_t perturb = hash;;) {
+        perturb >>= PERTURB_SHIFT;
+        i = ((i << 2) + i + perturb + 1) & mask;
+        ix = dk_get_index(dk, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL) {
+                *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
             }
+            *value_addr = NULL;
+            return ix;
         }
+        if (ix == DKIX_DUMMY) {
+            if (freeslot == -1)
+                freeslot = i;
+            continue;
+        }
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL);
         if (ep->me_key == key) {
+            if (hashpos != NULL) {
+                *hashpos = i;
+            }
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
-        if (ep->me_hash == hash && ep->me_key != dummy) {
+        if (ep->me_hash == hash) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
             if (cmp < 0) {
                 *value_addr = NULL;
-                return NULL;
+                return DKIX_ERROR;
             }
-            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+            if (dk == mp->ma_keys && ep->me_key == startkey) {
                 if (cmp > 0) {
+                    if (hashpos != NULL) {
+                        *hashpos = i;
+                    }
                     *value_addr = &ep->me_value;
-                    return ep;
+                    return ix;
                 }
             }
             else {
@@ -520,72 +791,80 @@ top:
                 goto top;
             }
         }
-        else if (ep->me_key == dummy && freeslot == NULL)
-            freeslot = ep;
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
 /* Specialized version for string-only keys */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_unicode(PyDictObject *mp, PyObject *key,
-                 Py_hash_t hash, PyObject ***value_addr)
+                 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
     size_t i;
-    size_t perturb;
-    PyDictKeyEntry *freeslot;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix, freeslot;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
 
+    assert(mp->ma_values == NULL);
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
         mp->ma_keys->dk_lookup = lookdict;
-        return lookdict(mp, key, hash, value_addr);
+        return lookdict(mp, key, hash, value_addr, hashpos);
     }
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key) {
-        *value_addr = &ep->me_value;
-        return ep;
+    ix = dk_get_index(mp->ma_keys, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    if (ix == DKIX_DUMMY) {
+        freeslot = i;
     }
-    if (ep->me_key == dummy)
-        freeslot = ep;
     else {
-        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL);
+        if (ep->me_key == key
+            || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+            if (hashpos != NULL)
+                *hashpos = i;
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
-        freeslot = NULL;
+        freeslot = -1;
     }
 
-    /* In the loop, me_key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        if (ep->me_key == NULL) {
-            if (freeslot == NULL) {
-                *value_addr = &ep->me_value;
-                return ep;
-            } else {
-                *value_addr = &freeslot->me_value;
-                return freeslot;
+    for (size_t perturb = hash;;) {
+        perturb >>= PERTURB_SHIFT;
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL) {
+                *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
             }
+            *value_addr = NULL;
+            return DKIX_EMPTY;
+        }
+        if (ix == DKIX_DUMMY) {
+            if (freeslot == -1)
+                freeslot = i;
+            continue;
         }
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL);
         if (ep->me_key == key
-            || (ep->me_hash == hash
-            && ep->me_key != dummy
-            && unicode_eq(ep->me_key, key))) {
+            || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
             *value_addr = &ep->me_value;
-            return ep;
+            if (hashpos != NULL) {
+                *hashpos = i;
+            }
+            return ix;
         }
-        if (ep->me_key == dummy && freeslot == NULL)
-            freeslot = ep;
     }
     assert(0);          /* NOT REACHED */
     return 0;
@@ -593,40 +872,63 @@ lookdict_unicode(PyDictObject *mp, PyObject *key,
 
 /* Faster version of lookdict_unicode when it is known that no <dummy> keys
  * will be present. */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
-                         Py_hash_t hash, PyObject ***value_addr)
+                         Py_hash_t hash, PyObject ***value_addr,
+                         Py_ssize_t *hashpos)
 {
     size_t i;
-    size_t perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
 
+    assert(mp->ma_values == NULL);
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
         mp->ma_keys->dk_lookup = lookdict;
-        return lookdict(mp, key, hash, value_addr);
+        return lookdict(mp, key, hash, value_addr, hashpos);
     }
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-    if (ep->me_key == NULL || ep->me_key == key ||
+    ix = dk_get_index(mp->ma_keys, i);
+    assert (ix != DKIX_DUMMY);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    ep = &ep0[ix];
+    assert(ep->me_key != NULL);
+    assert(PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == key ||
         (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+        if (hashpos != NULL)
+            *hashpos = i;
         *value_addr = &ep->me_value;
-        return ep;
-    }
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-        if (ep->me_key == NULL || ep->me_key == key ||
+        return ix;
+    }
+    for (size_t perturb = hash;;) {
+        perturb >>= PERTURB_SHIFT;
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        assert (ix != DKIX_DUMMY);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = NULL;
+            return DKIX_EMPTY;
+        }
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == key ||
             (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+            if (hashpos != NULL)
+                *hashpos = i;
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
     }
     assert(0);          /* NOT REACHED */
@@ -638,39 +940,62 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
  * Split tables only contain unicode keys and no dummy keys,
  * so algorithm is the same as lookdict_unicode_nodummy.
  */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_split(PyDictObject *mp, PyObject *key,
-               Py_hash_t hash, PyObject ***value_addr)
+               Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
     size_t i;
-    size_t perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
 
+    /* mp must split table */
+    assert(mp->ma_values != NULL);
     if (!PyUnicode_CheckExact(key)) {
-        ep = lookdict(mp, key, hash, value_addr);
-        /* lookdict expects a combined-table, so fix value_addr */
-        i = ep - ep0;
-        *value_addr = &mp->ma_values[i];
-        return ep;
+        ix = lookdict(mp, key, hash, value_addr, hashpos);
+        if (ix >= 0) {
+            *value_addr = &mp->ma_values[ix];
+        }
+        return ix;
     }
+
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-    if (ep->me_key == NULL || ep->me_key == key ||
+    ix = dk_get_index(mp->ma_keys, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    assert(ix >= 0);
+    ep = &ep0[ix];
+    assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == key ||
         (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
-        *value_addr = &mp->ma_values[i];
-        return ep;
-    }
-    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-        if (ep->me_key == NULL || ep->me_key == key ||
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = &mp->ma_values[ix];
+        return ix;
+    }
+    for (size_t perturb = hash;;) {
+        perturb >>= PERTURB_SHIFT;
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = NULL;
+            return DKIX_EMPTY;
+        }
+        assert(ix >= 0);
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == key ||
             (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
-            *value_addr = &mp->ma_values[i & mask];
-            return ep;
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = &mp->ma_values[ix];
+            return ix;
         }
     }
     assert(0);          /* NOT REACHED */
@@ -707,27 +1032,27 @@ _PyDict_MaybeUntrack(PyObject *op)
 {
     PyDictObject *mp;
     PyObject *value;
-    Py_ssize_t i, size;
+    Py_ssize_t i, numentries;
+    PyDictKeyEntry *ep0;
 
     if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
         return;
 
     mp = (PyDictObject *) op;
-    size = DK_SIZE(mp->ma_keys);
+    ep0 = DK_ENTRIES(mp->ma_keys);
+    numentries = mp->ma_keys->dk_nentries;
     if (_PyDict_HasSplitTable(mp)) {
-        for (i = 0; i < size; i++) {
+        for (i = 0; i < numentries; i++) {
             if ((value = mp->ma_values[i]) == NULL)
                 continue;
             if (_PyObject_GC_MAY_BE_TRACKED(value)) {
-                assert(!_PyObject_GC_MAY_BE_TRACKED(
-                    mp->ma_keys->dk_entries[i].me_key));
+                assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
                 return;
             }
         }
     }
     else {
-        PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-        for (i = 0; i < size; i++) {
+        for (i = 0; i < numentries; i++) {
             if ((value = ep0[i].me_value) == NULL)
                 continue;
             if (_PyObject_GC_MAY_BE_TRACKED(value) ||
@@ -739,33 +1064,35 @@ _PyDict_MaybeUntrack(PyObject *op)
 }
 
 /* Internal function to find slot for an item from its hash
- * when it is known that the key is not present in the dict.
- */
-static PyDictKeyEntry *
+   when it is known that the key is not present in the dict.
+
+   The dict must be combined. */
+static void
 find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
-                PyObject ***value_addr)
+                PyObject ***value_addr, Py_ssize_t *hashpos)
 {
     size_t i;
-    size_t perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
 
+    assert(!_PyDict_HasSplitTable(mp));
+    assert(hashpos != NULL);
     assert(key != NULL);
+
     if (!PyUnicode_CheckExact(key))
         mp->ma_keys->dk_lookup = lookdict;
     i = hash & mask;
-    ep = &ep0[i];
-    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
+    ix = dk_get_index(mp->ma_keys, i);
+    for (size_t perturb = hash; ix != DKIX_EMPTY;) {
+        perturb >>= PERTURB_SHIFT;
         i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
+        ix = dk_get_index(mp->ma_keys, i & mask);
     }
+    ep = &ep0[mp->ma_keys->dk_nentries];
+    *hashpos = i & mask;
     assert(ep->me_value == NULL);
-    if (mp->ma_values)
-        *value_addr = &mp->ma_values[i & mask];
-    else
-        *value_addr = &ep->me_value;
-    return ep;
+    *value_addr = &ep->me_value;
 }
 
 static int
@@ -784,58 +1111,88 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 {
     PyObject *old_value;
     PyObject **value_addr;
-    PyDictKeyEntry *ep;
-    assert(key != dummy);
+    PyDictKeyEntry *ep, *ep0;
+    Py_ssize_t hashpos, ix;
 
     if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
         if (insertion_resize(mp) < 0)
             return -1;
     }
 
-    ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
-    if (ep == NULL) {
+    ix = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR) {
         return -1;
     }
+
     assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
     Py_INCREF(value);
     MAINTAIN_TRACKING(mp, key, value);
-    old_value = *value_addr;
-    if (old_value != NULL) {
-        assert(ep->me_key != NULL && ep->me_key != dummy);
-        *value_addr = value;
-        Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
+
+    /* When insertion order is different from shared key, we can't share
+     * the key anymore.  Convert this instance to combine table.
+     */
+    if (_PyDict_HasSplitTable(mp) &&
+        ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+         (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+        if (insertion_resize(mp) < 0) {
+            Py_DECREF(value);
+            return -1;
+        }
+        find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        ix = DKIX_EMPTY;
     }
-    else {
-        if (ep->me_key == NULL) {
-            Py_INCREF(key);
-            if (mp->ma_keys->dk_usable <= 0) {
-                /* Need to resize. */
-                if (insertion_resize(mp) < 0) {
-                    Py_DECREF(key);
-                    Py_DECREF(value);
-                    return -1;
-                }
-                ep = find_empty_slot(mp, key, hash, &value_addr);
+
+    if (ix == DKIX_EMPTY) {
+        /* Insert into new slot. */
+        if (mp->ma_keys->dk_usable <= 0) {
+            /* Need to resize. */
+            if (insertion_resize(mp) < 0) {
+                Py_DECREF(value);
+                return -1;
             }
-            mp->ma_keys->dk_usable--;
-            assert(mp->ma_keys->dk_usable >= 0);
-            ep->me_key = key;
-            ep->me_hash = hash;
+            find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        }
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        ep = &ep0[mp->ma_keys->dk_nentries];
+        dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
+        Py_INCREF(key);
+        ep->me_key = key;
+        ep->me_hash = hash;
+        if (mp->ma_values) {
+            assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+            mp->ma_values[mp->ma_keys->dk_nentries] = value;
         }
         else {
-            if (ep->me_key == dummy) {
-                Py_INCREF(key);
-                ep->me_key = key;
-                ep->me_hash = hash;
-                Py_DECREF(dummy);
-            } else {
-                assert(_PyDict_HasSplitTable(mp));
-            }
+            ep->me_value = value;
         }
         mp->ma_used++;
+        mp->ma_version_tag = DICT_NEXT_VERSION();
+        mp->ma_keys->dk_usable--;
+        mp->ma_keys->dk_nentries++;
+        assert(mp->ma_keys->dk_usable >= 0);
+        assert(_PyDict_CheckConsistency(mp));
+        return 0;
+    }
+
+    assert(value_addr != NULL);
+
+    old_value = *value_addr;
+    if (old_value != NULL) {
         *value_addr = value;
-        assert(ep->me_key != NULL && ep->me_key != dummy);
+        mp->ma_version_tag = DICT_NEXT_VERSION();
+        assert(_PyDict_CheckConsistency(mp));
+
+        Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
+        return 0;
     }
+
+    /* pending state */
+    assert(_PyDict_HasSplitTable(mp));
+    assert(ix == mp->ma_used);
+    *value_addr = value;
+    mp->ma_used++;
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    assert(_PyDict_CheckConsistency(mp));
     return 0;
 }
 
@@ -854,24 +1211,24 @@ insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
                  PyObject *value)
 {
     size_t i;
-    size_t perturb;
     PyDictKeysObject *k = mp->ma_keys;
     size_t mask = (size_t)DK_SIZE(k)-1;
-    PyDictKeyEntry *ep0 = &k->dk_entries[0];
+    PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
     PyDictKeyEntry *ep;
 
     assert(k->dk_lookup != NULL);
     assert(value != NULL);
     assert(key != NULL);
-    assert(key != dummy);
     assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
     i = hash & mask;
-    ep = &ep0[i];
-    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
+    for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
+        perturb >>= PERTURB_SHIFT;
+        i = mask & ((i << 2) + i + perturb + 1);
     }
+    ep = &ep0[k->dk_nentries];
     assert(ep->me_value == NULL);
+    dk_set_index(k, i, k->dk_nentries);
+    k->dk_nentries++;
     ep->me_key = key;
     ep->me_hash = hash;
     ep->me_value = value;
@@ -888,16 +1245,16 @@ After resizing a table is always combined,
 but can be resplit by make_keys_shared().
 */
 static int
-dictresize(PyDictObject *mp, Py_ssize_t minused)
+dictresize(PyDictObject *mp, Py_ssize_t minsize)
 {
-    Py_ssize_t newsize;
+    Py_ssize_t i, newsize;
     PyDictKeysObject *oldkeys;
     PyObject **oldvalues;
-    Py_ssize_t i, oldsize;
+    PyDictKeyEntry *ep0;
 
-/* Find the smallest table size > minused. */
-    for (newsize = PyDict_MINSIZE_COMBINED;
-         newsize <= minused && newsize > 0;
+    /* Find the smallest table size > minused. */
+    for (newsize = PyDict_MINSIZE;
+         newsize < minsize && newsize > 0;
          newsize <<= 1)
         ;
     if (newsize <= 0) {
@@ -912,56 +1269,45 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
         mp->ma_keys = oldkeys;
         return -1;
     }
+    // New table must be large enough.
+    assert(mp->ma_keys->dk_usable >= mp->ma_used);
     if (oldkeys->dk_lookup == lookdict)
         mp->ma_keys->dk_lookup = lookdict;
-    oldsize = DK_SIZE(oldkeys);
     mp->ma_values = NULL;
-    /* If empty then nothing to copy so just return */
-    if (oldsize == 1) {
-        assert(oldkeys == Py_EMPTY_KEYS);
-        DK_DECREF(oldkeys);
-        return 0;
-    }
+    ep0 = DK_ENTRIES(oldkeys);
     /* Main loop below assumes we can transfer refcount to new keys
      * and that value is stored in me_value.
      * Increment ref-counts and copy values here to compensate
      * This (resizing a split table) should be relatively rare */
     if (oldvalues != NULL) {
-        for (i = 0; i < oldsize; i++) {
+        for (i = 0; i < oldkeys->dk_nentries; i++) {
             if (oldvalues[i] != NULL) {
-                Py_INCREF(oldkeys->dk_entries[i].me_key);
-                oldkeys->dk_entries[i].me_value = oldvalues[i];
+                Py_INCREF(ep0[i].me_key);
+                ep0[i].me_value = oldvalues[i];
             }
         }
     }
     /* Main loop */
-    for (i = 0; i < oldsize; i++) {
-        PyDictKeyEntry *ep = &oldkeys->dk_entries[i];
+    for (i = 0; i < oldkeys->dk_nentries; i++) {
+        PyDictKeyEntry *ep = &ep0[i];
         if (ep->me_value != NULL) {
-            assert(ep->me_key != dummy);
             insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
         }
     }
     mp->ma_keys->dk_usable -= mp->ma_used;
     if (oldvalues != NULL) {
         /* NULL out me_value slot in oldkeys, in case it was shared */
-        for (i = 0; i < oldsize; i++)
-            oldkeys->dk_entries[i].me_value = NULL;
-        assert(oldvalues != empty_values);
-        free_values(oldvalues);
+        for (i = 0; i < oldkeys->dk_nentries; i++)
+            ep0[i].me_value = NULL;
         DK_DECREF(oldkeys);
+        if (oldvalues != empty_values) {
+            free_values(oldvalues);
+        }
     }
     else {
         assert(oldkeys->dk_lookup != lookdict_split);
-        if (oldkeys->dk_lookup != lookdict_unicode_nodummy) {
-            PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0];
-            for (i = 0; i < oldsize; i++) {
-                if (ep0[i].me_key == dummy)
-                    Py_DECREF(dummy);
-            }
-        }
         assert(oldkeys->dk_refcnt == 1);
-        DK_DEBUG_DECREF PyMem_FREE(oldkeys);
+        DK_DEBUG_DECREF PyObject_FREE(oldkeys);
     }
     return 0;
 }
@@ -985,16 +1331,14 @@ make_keys_shared(PyObject *op)
             return NULL;
         }
         else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
-            /* Remove dummy keys
-             * -1 is required since dictresize() uses key size > minused
-             */
-            if (dictresize(mp, DK_SIZE(mp->ma_keys) - 1))
+            /* Remove dummy keys */
+            if (dictresize(mp, DK_SIZE(mp->ma_keys)))
                 return NULL;
         }
         assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
         /* Copy values into a new array */
-        ep0 = &mp->ma_keys->dk_entries[0];
-        size = DK_SIZE(mp->ma_keys);
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
         values = new_values(size);
         if (values == NULL) {
             PyErr_SetString(PyExc_MemoryError,
@@ -1015,12 +1359,26 @@ make_keys_shared(PyObject *op)
 PyObject *
 _PyDict_NewPresized(Py_ssize_t minused)
 {
+    const Py_ssize_t max_presize = 128 * 1024;
     Py_ssize_t newsize;
     PyDictKeysObject *new_keys;
-    for (newsize = PyDict_MINSIZE_COMBINED;
-         newsize <= minused && newsize > 0;
-         newsize <<= 1)
-        ;
+
+    /* There are no strict guarantee that returned dict can contain minused
+     * items without resize.  So we create medium size dict instead of very
+     * large dict or MemoryError.
+     */
+    if (minused > USABLE_FRACTION(max_presize)) {
+        newsize = max_presize;
+    }
+    else {
+        Py_ssize_t minsize = ESTIMATE_SIZE(minused);
+        newsize = PyDict_MINSIZE;
+        while (newsize < minsize) {
+            newsize <<= 1;
+        }
+    }
+    assert(IS_POWER_OF_2(newsize));
+
     new_keys = new_keys_object(newsize);
     if (new_keys == NULL)
         return NULL;
@@ -1041,8 +1399,8 @@ PyObject *
 PyDict_GetItem(PyObject *op, PyObject *key)
 {
     Py_hash_t hash;
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyThreadState *tstate;
     PyObject **value_addr;
 
@@ -1068,15 +1426,15 @@ PyDict_GetItem(PyObject *op, PyObject *key)
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
         PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
         /* ignore errors */
         PyErr_Restore(err_type, err_value, err_tb);
-        if (ep == NULL)
+        if (ix < 0)
             return NULL;
     }
     else {
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-        if (ep == NULL) {
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+        if (ix < 0) {
             PyErr_Clear();
             return NULL;
         }
@@ -1084,39 +1442,25 @@ PyDict_GetItem(PyObject *op, PyObject *key)
     return *value_addr;
 }
 
+/* Same as PyDict_GetItemWithError() but with hash supplied by caller.
+   This returns NULL *with* an exception set if an exception occurred.
+   It returns NULL *without* an exception set if the key wasn't present.
+*/
 PyObject *
 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
 {
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
-    PyThreadState *tstate;
     PyObject **value_addr;
 
-    if (!PyDict_Check(op))
+    if (!PyDict_Check(op)) {
+        PyErr_BadInternalCall();
         return NULL;
-
-    /* We can arrive here with a NULL tstate during initialization: try
-       running "python -Wi" for an example related to string interning.
-       Let's just hope that no exception occurs then...  This must be
-       _PyThreadState_Current and not PyThreadState_GET() because in debug
-       mode, the latter complains if tstate is NULL. */
-    tstate = _PyThreadState_UncheckedGet();
-    if (tstate != NULL && tstate->curexc_type != NULL) {
-        /* preserve the existing exception */
-        PyObject *err_type, *err_value, *err_tb;
-        PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-        /* ignore errors */
-        PyErr_Restore(err_type, err_value, err_tb);
-        if (ep == NULL)
-            return NULL;
     }
-    else {
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-        if (ep == NULL) {
-            PyErr_Clear();
-            return NULL;
-        }
+
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix < 0) {
+        return NULL;
     }
     return *value_addr;
 }
@@ -1128,9 +1472,9 @@ _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
 PyObject *
 PyDict_GetItemWithError(PyObject *op, PyObject *key)
 {
+    Py_ssize_t ix;
     Py_hash_t hash;
     PyDictObject*mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
@@ -1146,8 +1490,8 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
         }
     }
 
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix < 0)
         return NULL;
     return *value_addr;
 }
@@ -1162,39 +1506,40 @@ _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
     return PyDict_GetItemWithError(dp, kv);
 }
 
-/* Fast version of global value lookup.
+/* Fast version of global value lookup (LOAD_GLOBAL).
  * Lookup in globals, then builtins.
+ *
+ * Raise an exception and return NULL if an error occurred (ex: computing the
+ * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
+ * exist. Return the value if the key exists.
  */
 PyObject *
 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
 {
-    PyObject *x;
-    if (PyUnicode_CheckExact(key)) {
-        PyObject **value_addr;
-        Py_hash_t hash = ((PyASCIIObject *)key)->hash;
-        if (hash != -1) {
-            PyDictKeyEntry *e;
-            e = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr);
-            if (e == NULL) {
-                return NULL;
-            }
-            x = *value_addr;
-            if (x != NULL)
-                return x;
-            e = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr);
-            if (e == NULL) {
-                return NULL;
-            }
-            x = *value_addr;
-            return x;
-        }
+    Py_ssize_t ix;
+    Py_hash_t hash;
+    PyObject **value_addr;
+
+    if (!PyUnicode_CheckExact(key) ||
+        (hash = ((PyASCIIObject *) key)->hash) == -1)
+    {
+        hash = PyObject_Hash(key);
+        if (hash == -1)
+            return NULL;
     }
-    x = PyDict_GetItemWithError((PyObject *)globals, key);
-    if (x != NULL)
-        return x;
-    if (PyErr_Occurred())
+
+    /* namespace 1: globals */
+    ix = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    return PyDict_GetItemWithError((PyObject *)builtins, key);
+    if (ix != DKIX_EMPTY && *value_addr != NULL)
+        return *value_addr;
+
+    /* namespace 2: builtins */
+    ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr, NULL);
+    if (ix < 0)
+        return NULL;
+    return *value_addr;
 }
 
 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -1247,36 +1592,33 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
 }
 
 static int
-delitem_common(PyDictObject *mp, PyDictKeyEntry *ep, PyObject **value_addr)
+delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix,
+               PyObject **value_addr)
 {
     PyObject *old_key, *old_value;
+    PyDictKeyEntry *ep;
 
     old_value = *value_addr;
+    assert(old_value != NULL);
     *value_addr = NULL;
     mp->ma_used--;
-    if (!_PyDict_HasSplitTable(mp)) {
-        ENSURE_ALLOWS_DELETIONS(mp);
-        old_key = ep->me_key;
-        Py_INCREF(dummy);
-        ep->me_key = dummy;
-        Py_DECREF(old_key);
-    }
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    ep = &DK_ENTRIES(mp->ma_keys)[ix];
+    dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+    ENSURE_ALLOWS_DELETIONS(mp);
+    old_key = ep->me_key;
+    ep->me_key = NULL;
+    Py_DECREF(old_key);
     Py_DECREF(old_value);
+
+    assert(_PyDict_CheckConsistency(mp));
     return 0;
 }
 
 int
 PyDict_DelItem(PyObject *op, PyObject *key)
 {
-    PyDictObject *mp;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
-    PyObject **value_addr;
-
-    if (!PyDict_Check(op)) {
-        PyErr_BadInternalCall();
-        return -1;
-    }
     assert(key);
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1284,22 +1626,15 @@ PyDict_DelItem(PyObject *op, PyObject *key)
         if (hash == -1)
             return -1;
     }
-    mp = (PyDictObject *)op;
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
-        return -1;
-    if (*value_addr == NULL) {
-        _PyErr_SetKeyError(key);
-        return -1;
-    }
-    return delitem_common(mp, ep, value_addr);
+
+    return _PyDict_DelItem_KnownHash(op, key, hash);
 }
 
 int
 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
 {
+    Py_ssize_t hashpos, ix;
     PyDictObject *mp;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
@@ -1309,23 +1644,37 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
     assert(key);
     assert(hash != -1);
     mp = (PyDictObject *)op;
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return -1;
-    if (*value_addr == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         _PyErr_SetKeyError(key);
         return -1;
     }
-    return delitem_common(mp, ep, value_addr);
+    assert(dk_get_index(mp->ma_keys, hashpos) == ix);
+
+    // Split table doesn't allow deletion.  Combine it.
+    if (_PyDict_HasSplitTable(mp)) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+            return -1;
+        }
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+        assert(ix >= 0);
+    }
+    return delitem_common(mp, hashpos, ix, value_addr);
 }
 
+/* This function promises that the predicate -> deletion sequence is atomic
+ * (i.e. protected by the GIL), assuming the predicate itself doesn't
+ * release the GIL.
+ */
 int
 _PyDict_DelItemIf(PyObject *op, PyObject *key,
                   int (*predicate)(PyObject *value))
 {
+    Py_ssize_t hashpos, ix;
     PyDictObject *mp;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
     int res;
 
@@ -1338,18 +1687,29 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
     if (hash == -1)
         return -1;
     mp = (PyDictObject *)op;
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return -1;
-    if (*value_addr == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         _PyErr_SetKeyError(key);
         return -1;
     }
+    assert(dk_get_index(mp->ma_keys, hashpos) == ix);
+
+    // Split table doesn't allow deletion.  Combine it.
+    if (_PyDict_HasSplitTable(mp)) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+            return -1;
+        }
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+        assert(ix >= 0);
+    }
+
     res = predicate(*value_addr);
     if (res == -1)
         return -1;
     if (res > 0)
-        return delitem_common(mp, ep, value_addr);
+        return delitem_common(mp, hashpos, ix, value_addr);
     else
         return 0;
 }
@@ -1375,9 +1735,10 @@ PyDict_Clear(PyObject *op)
     mp->ma_keys = Py_EMPTY_KEYS;
     mp->ma_values = empty_values;
     mp->ma_used = 0;
+    mp->ma_version_tag = DICT_NEXT_VERSION();
     /* ...then clear the keys and values */
     if (oldvalues != NULL) {
-        n = DK_SIZE(oldkeys);
+        n = oldkeys->dk_nentries;
         for (i = 0; i < n; i++)
             Py_CLEAR(oldvalues[i]);
         free_values(oldvalues);
@@ -1387,42 +1748,59 @@ PyDict_Clear(PyObject *op)
        assert(oldkeys->dk_refcnt == 1);
        DK_DECREF(oldkeys);
     }
+    assert(_PyDict_CheckConsistency(mp));
 }
 
-/* Returns -1 if no more items (or op is not a dict),
- * index of item otherwise. Stores value in pvalue
+/* Internal version of PyDict_Next that returns a hash value in addition
+ * to the key and value.
+ * Return 1 on success, return 0 when the reached the end of the dictionary
+ * (or if op is not a dictionary)
  */
-Py_LOCAL_INLINE(Py_ssize_t)
-dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
+int
+_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
+             PyObject **pvalue, Py_hash_t *phash)
 {
-    Py_ssize_t mask, offset;
+    Py_ssize_t i, n;
     PyDictObject *mp;
-    PyObject **value_ptr;
-
+    PyDictKeyEntry *entry_ptr;
+    PyObject *value;
 
     if (!PyDict_Check(op))
-        return -1;
+        return 0;
     mp = (PyDictObject *)op;
-    if (i < 0)
-        return -1;
+    i = *ppos;
+    n = mp->ma_keys->dk_nentries;
+    if ((size_t)i >= (size_t)n)
+        return 0;
     if (mp->ma_values) {
-        value_ptr = &mp->ma_values[i];
-        offset = sizeof(PyObject *);
+        PyObject **value_ptr = &mp->ma_values[i];
+        while (i < n && *value_ptr == NULL) {
+            value_ptr++;
+            i++;
+        }
+        if (i >= n)
+            return 0;
+        entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
+        value = *value_ptr;
     }
     else {
-        value_ptr = &mp->ma_keys->dk_entries[i].me_value;
-        offset = sizeof(PyDictKeyEntry);
-    }
-    mask = DK_MASK(mp->ma_keys);
-    while (i <= mask && *value_ptr == NULL) {
-        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
-        i++;
+        entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
+        while (i < n && entry_ptr->me_value == NULL) {
+            entry_ptr++;
+            i++;
+        }
+        if (i >= n)
+            return 0;
+        value = entry_ptr->me_value;
     }
-    if (i > mask)
-        return -1;
+    *ppos = i+1;
+    if (pkey)
+        *pkey = entry_ptr->me_key;
+    if (phash)
+        *phash = entry_ptr->me_hash;
     if (pvalue)
-        *pvalue = *value_ptr;
-    return i;
+        *pvalue = value;
+    return 1;
 }
 
 /*
@@ -1432,9 +1810,12 @@ dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
  *     PyObject *key, *value;
  *     i = 0;   # important!  i should not otherwise be changed by you
  *     while (PyDict_Next(yourdict, &i, &key, &value)) {
- *              Refer to borrowed references in key and value.
+ *         Refer to borrowed references in key and value.
  *     }
  *
+ * Return 1 on success, return 0 when the reached the end of the dictionary
+ * (or if op is not a dictionary)
+ *
  * CAUTION:  In general, it isn't safe to use PyDict_Next in a loop that
  * mutates the dict.  One exception:  it is safe if the loop merely changes
  * the values associated with the keys (but doesn't insert new keys or
@@ -1443,43 +1824,21 @@ dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
 int
 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 {
-    PyDictObject *mp;
-    Py_ssize_t i = dict_next(op, *ppos, pvalue);
-    if (i < 0)
-        return 0;
-    mp = (PyDictObject *)op;
-    *ppos = i+1;
-    if (pkey)
-        *pkey = mp->ma_keys->dk_entries[i].me_key;
-    return 1;
-}
-
-/* Internal version of PyDict_Next that returns a hash value in addition
- * to the key and value.
- */
-int
-_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
-             PyObject **pvalue, Py_hash_t *phash)
-{
-    PyDictObject *mp;
-    Py_ssize_t i = dict_next(op, *ppos, pvalue);
-    if (i < 0)
-        return 0;
-    mp = (PyDictObject *)op;
-    *ppos = i+1;
-    *phash = mp->ma_keys->dk_entries[i].me_hash;
-    if (pkey)
-        *pkey = mp->ma_keys->dk_entries[i].me_key;
-    return 1;
+    return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
 }
 
 /* Internal version of dict.pop(). */
 PyObject *
-_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *deflt)
+_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
 {
+    Py_ssize_t ix, hashpos;
     PyObject *old_value, *old_key;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
+    PyDictObject *mp;
+
+    assert(PyDict_Check(dict));
+    mp = (PyDictObject *)dict;
 
     if (mp->ma_used == 0) {
         if (deflt) {
@@ -1489,11 +1848,10 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject
         _PyErr_SetKeyError(key);
         return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return NULL;
-    old_value = *value_addr;
-    if (old_value == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
@@ -1501,24 +1859,38 @@ _PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject
         _PyErr_SetKeyError(key);
         return NULL;
     }
+
+    // Split table doesn't allow deletion.  Combine it.
+    if (_PyDict_HasSplitTable(mp)) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+            return NULL;
+        }
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+        assert(ix >= 0);
+    }
+
+    old_value = *value_addr;
+    assert(old_value != NULL);
     *value_addr = NULL;
     mp->ma_used--;
-    if (!_PyDict_HasSplitTable(mp)) {
-        ENSURE_ALLOWS_DELETIONS(mp);
-        old_key = ep->me_key;
-        Py_INCREF(dummy);
-        ep->me_key = dummy;
-        Py_DECREF(old_key);
-    }
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+    ep = &DK_ENTRIES(mp->ma_keys)[ix];
+    ENSURE_ALLOWS_DELETIONS(mp);
+    old_key = ep->me_key;
+    ep->me_key = NULL;
+    Py_DECREF(old_key);
+
+    assert(_PyDict_CheckConsistency(mp));
     return old_value;
 }
 
 PyObject *
-_PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
+_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
 {
     Py_hash_t hash;
 
-    if (mp->ma_used == 0) {
+    if (((PyDictObject *)dict)->ma_used == 0) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
@@ -1532,7 +1904,7 @@ _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
         if (hash == -1)
             return NULL;
     }
-    return _PyDict_Pop_KnownHash(mp, key, hash, deflt);
+    return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
 }
 
 /* Internal version of dict.from_keys().  It is subclass-friendly. */
@@ -1556,7 +1928,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, Py_SIZE(iterable))) {
+            if (dictresize(mp, ESTIMATE_SIZE(Py_SIZE(iterable)))) {
                 Py_DECREF(d);
                 return NULL;
             }
@@ -1575,7 +1947,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, PySet_GET_SIZE(iterable))) {
+            if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
                 Py_DECREF(d);
                 return NULL;
             }
@@ -1635,7 +2007,7 @@ dict_dealloc(PyDictObject *mp)
     Py_TRASHCAN_SAFE_BEGIN(mp)
     if (values != NULL) {
         if (values != empty_values) {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
                 Py_XDECREF(values[i]);
             }
             free_values(values);
@@ -1747,8 +2119,8 @@ static PyObject *
 dict_subscript(PyDictObject *mp, PyObject *key)
 {
     PyObject *v;
+    Py_ssize_t ix;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
@@ -1757,11 +2129,10 @@ dict_subscript(PyDictObject *mp, PyObject *key)
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    v = *value_addr;
-    if (v == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         if (!PyDict_CheckExact(mp)) {
             /* Look up __missing__ method if we're a subclass. */
             PyObject *missing, *res;
@@ -1779,8 +2150,8 @@ dict_subscript(PyDictObject *mp, PyObject *key)
         _PyErr_SetKeyError(key);
         return NULL;
     }
-    else
-        Py_INCREF(v);
+    v = *value_addr;
+    Py_INCREF(v);
     return v;
 }
 
@@ -1820,8 +2191,8 @@ dict_keys(PyDictObject *mp)
         Py_DECREF(v);
         goto again;
     }
-    ep = &mp->ma_keys->dk_entries[0];
-    size = DK_SIZE(mp->ma_keys);
+    ep = DK_ENTRIES(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
@@ -1848,6 +2219,7 @@ dict_values(PyDictObject *mp)
 {
     PyObject *v;
     Py_ssize_t i, j;
+    PyDictKeyEntry *ep;
     Py_ssize_t size, n, offset;
     PyObject **value_ptr;
 
@@ -1863,13 +2235,14 @@ dict_values(PyDictObject *mp)
         Py_DECREF(v);
         goto again;
     }
-    size = DK_SIZE(mp->ma_keys);
+    ep = DK_ENTRIES(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
     }
     else {
-        value_ptr = &mp->ma_keys->dk_entries[0].me_value;
+        value_ptr = &ep[0].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
     for (i = 0, j = 0; i < size; i++) {
@@ -1920,8 +2293,8 @@ dict_items(PyDictObject *mp)
         goto again;
     }
     /* Nothing we do below makes any function calls. */
-    ep = mp->ma_keys->dk_entries;
-    size = DK_SIZE(mp->ma_keys);
+    ep = DK_ENTRIES(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
@@ -1965,7 +2338,8 @@ dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
 }
 
 static int
-dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
+dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
+                   const char *methname)
 {
     PyObject *arg = NULL;
     int result = 0;
@@ -2067,6 +2441,7 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
     }
 
     i = 0;
+    assert(_PyDict_CheckConsistency((PyDictObject *)d));
     goto Return;
 Fail:
     Py_XDECREF(item);
@@ -2077,18 +2452,14 @@ Return:
     return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
 }
 
-int
-PyDict_Update(PyObject *a, PyObject *b)
-{
-    return PyDict_Merge(a, b, 1);
-}
-
-int
-PyDict_Merge(PyObject *a, PyObject *b, int override)
+static int
+dict_merge(PyObject *a, PyObject *b, int override)
 {
     PyDictObject *mp, *other;
     Py_ssize_t i, n;
-    PyDictKeyEntry *entry;
+    PyDictKeyEntry *entry, *ep0;
+
+    assert(0 <= override && override <= 2);
 
     /* We accept for the argument either a concrete dictionary object,
      * or an abstract "mapping" object.  For the former, we can do
@@ -2115,13 +2486,16 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
          * incrementally resizing as we insert new items.  Expect
          * that there will be no (or few) overlapping keys.
          */
-        if (mp->ma_keys->dk_usable * 3 < other->ma_used * 2)
-            if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
+        if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
+            if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
                return -1;
-        for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) {
+            }
+        }
+        ep0 = DK_ENTRIES(other->ma_keys);
+        for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
             PyObject *key, *value;
             Py_hash_t hash;
-            entry = &other->ma_keys->dk_entries[i];
+            entry = &ep0[i];
             key = entry->me_key;
             hash = entry->me_hash;
             if (other->ma_values)
@@ -2133,14 +2507,28 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
                 int err = 0;
                 Py_INCREF(key);
                 Py_INCREF(value);
-                if (override || PyDict_GetItem(a, key) == NULL)
+                if (override == 1)
                     err = insertdict(mp, key, hash, value);
+                else if (_PyDict_GetItem_KnownHash(a, key, hash) == NULL) {
+                    if (PyErr_Occurred()) {
+                        Py_DECREF(value);
+                        Py_DECREF(key);
+                        return -1;
+                    }
+                    err = insertdict(mp, key, hash, value);
+                }
+                else if (override != 0) {
+                    _PyErr_SetKeyError(key);
+                    Py_DECREF(value);
+                    Py_DECREF(key);
+                    return -1;
+                }
                 Py_DECREF(value);
                 Py_DECREF(key);
                 if (err != 0)
                     return -1;
 
-                if (n != DK_SIZE(other->ma_keys)) {
+                if (n != other->ma_keys->dk_nentries) {
                     PyErr_SetString(PyExc_RuntimeError,
                                     "dict mutated during update");
                     return -1;
@@ -2169,7 +2557,13 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
             return -1;
 
         for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
-            if (!override && PyDict_GetItem(a, key) != NULL) {
+            if (override != 1 && PyDict_GetItem(a, key) != NULL) {
+                if (override != 0) {
+                    _PyErr_SetKeyError(key);
+                    Py_DECREF(key);
+                    Py_DECREF(iter);
+                    return -1;
+                }
                 Py_DECREF(key);
                 continue;
             }
@@ -2192,9 +2586,29 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
             /* Iterator completed, via error */
             return -1;
     }
+    assert(_PyDict_CheckConsistency((PyDictObject *)a));
     return 0;
 }
 
+int
+PyDict_Update(PyObject *a, PyObject *b)
+{
+    return dict_merge(a, b, 1);
+}
+
+int
+PyDict_Merge(PyObject *a, PyObject *b, int override)
+{
+    /* XXX Deprecate override not in (0, 1). */
+    return dict_merge(a, b, override != 0);
+}
+
+int
+_PyDict_MergeEx(PyObject *a, PyObject *b, int override)
+{
+    return dict_merge(a, b, override);
+}
+
 static PyObject *
 dict_copy(PyDictObject *mp)
 {
@@ -2215,7 +2629,9 @@ PyDict_Copy(PyObject *o)
     mp = (PyDictObject *)o;
     if (_PyDict_HasSplitTable(mp)) {
         PyDictObject *split_copy;
-        PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys));
+        Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
+        PyObject **newvalues;
+        newvalues = new_values(size);
         if (newvalues == NULL)
             return PyErr_NoMemory();
         split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
@@ -2227,7 +2643,7 @@ PyDict_Copy(PyObject *o)
         split_copy->ma_keys = mp->ma_keys;
         split_copy->ma_used = mp->ma_used;
         DK_INCREF(mp->ma_keys);
-        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+        for (i = 0, n = size; i < n; i++) {
             PyObject *value = mp->ma_values[i];
             Py_XINCREF(value);
             split_copy->ma_values[i] = value;
@@ -2298,8 +2714,8 @@ dict_equal(PyDictObject *a, PyDictObject *b)
         /* can't be equal if # of entries differ */
         return 0;
     /* Same # of entries -- check all of 'em.  Exit early on any diff. */
-    for (i = 0; i < DK_SIZE(a->ma_keys); i++) {
-        PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i];
+    for (i = 0; i < a->ma_keys->dk_nentries; i++) {
+        PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
         PyObject *aval;
         if (a->ma_values)
             aval = a->ma_values[i];
@@ -2316,7 +2732,7 @@ dict_equal(PyDictObject *a, PyDictObject *b)
             /* ditto for key */
             Py_INCREF(key);
             /* reuse the known hash value */
-            if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL)
+            if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr, NULL) < 0)
                 bval = NULL;
             else
                 bval = *vaddr;
@@ -2374,7 +2790,7 @@ dict___contains__(PyDictObject *self, PyObject *key)
 {
     register PyDictObject *mp = self;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
@@ -2383,10 +2799,12 @@ dict___contains__(PyDictObject *self, PyObject *key)
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    return PyBool_FromLong(*value_addr != NULL);
+    if (ix == DKIX_EMPTY || *value_addr == NULL)
+        Py_RETURN_FALSE;
+    Py_RETURN_TRUE;
 }
 
 static PyObject *
@@ -2396,7 +2814,7 @@ dict_get(PyDictObject *mp, PyObject *args)
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
     PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
@@ -2408,12 +2826,13 @@ dict_get(PyDictObject *mp, PyObject *args)
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    val = *value_addr;
-    if (val == NULL)
+    if (ix == DKIX_EMPTY || *value_addr == NULL)
         val = failobj;
+    else
+        val = *value_addr;
     Py_INCREF(val);
     return val;
 }
@@ -2422,43 +2841,88 @@ PyObject *
 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
 {
     PyDictObject *mp = (PyDictObject *)d;
-    PyObject *val = NULL;
+    PyObject *value;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
+    Py_ssize_t hashpos, ix;
     PyObject **value_addr;
 
     if (!PyDict_Check(d)) {
         PyErr_BadInternalCall();
         return NULL;
     }
+
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+
+    if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
+        if (insertion_resize(mp) < 0)
+            return NULL;
+    }
+
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return NULL;
-    val = *value_addr;
-    if (val == NULL) {
+
+    if (_PyDict_HasSplitTable(mp) &&
+        ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+         (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+        if (insertion_resize(mp) < 0) {
+            return NULL;
+        }
+        find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        ix = DKIX_EMPTY;
+    }
+
+    if (ix == DKIX_EMPTY) {
+        PyDictKeyEntry *ep, *ep0;
+        value = defaultobj;
         if (mp->ma_keys->dk_usable <= 0) {
-            /* Need to resize. */
-            if (insertion_resize(mp) < 0)
+            if (insertion_resize(mp) < 0) {
                 return NULL;
-            ep = find_empty_slot(mp, key, hash, &value_addr);
+            }
+            find_empty_slot(mp, key, hash, &value_addr, &hashpos);
         }
-        Py_INCREF(defaultobj);
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        ep = &ep0[mp->ma_keys->dk_nentries];
+        dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
         Py_INCREF(key);
-        MAINTAIN_TRACKING(mp, key, defaultobj);
+        Py_INCREF(value);
+        MAINTAIN_TRACKING(mp, key, value);
         ep->me_key = key;
         ep->me_hash = hash;
-        *value_addr = defaultobj;
-        val = defaultobj;
+        if (mp->ma_values) {
+            assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+            mp->ma_values[mp->ma_keys->dk_nentries] = value;
+        }
+        else {
+            ep->me_value = value;
+        }
+        mp->ma_used++;
+        mp->ma_version_tag = DICT_NEXT_VERSION();
         mp->ma_keys->dk_usable--;
+        mp->ma_keys->dk_nentries++;
+        assert(mp->ma_keys->dk_usable >= 0);
+    }
+    else if (*value_addr == NULL) {
+        value = defaultobj;
+        assert(_PyDict_HasSplitTable(mp));
+        assert(ix == mp->ma_used);
+        Py_INCREF(value);
+        MAINTAIN_TRACKING(mp, key, value);
+        *value_addr = value;
         mp->ma_used++;
+        mp->ma_version_tag = DICT_NEXT_VERSION();
     }
-    return val;
+    else {
+        value = *value_addr;
+    }
+
+    assert(_PyDict_CheckConsistency(mp));
+    return value;
 }
 
 static PyObject *
@@ -2490,17 +2954,16 @@ dict_pop(PyDictObject *mp, PyObject *args)
     if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
         return NULL;
 
-    return _PyDict_Pop(mp, key, deflt);
+    return _PyDict_Pop((PyObject*)mp, key, deflt);
 }
 
 static PyObject *
 dict_popitem(PyDictObject *mp)
 {
-    Py_hash_t i = 0;
-    PyDictKeyEntry *ep;
+    Py_ssize_t i, j;
+    PyDictKeyEntry *ep0, *ep;
     PyObject *res;
 
-
     /* Allocate the result tuple before checking the size.  Believe it
      * or not, this allocation could trigger a garbage collection which
      * could empty the dict, so if we checked the size first and that
@@ -2521,68 +2984,64 @@ dict_popitem(PyDictObject *mp)
     }
     /* Convert split table to combined table */
     if (mp->ma_keys->dk_lookup == lookdict_split) {
-        /* -1 is required since dictresize() uses key size > minused */
-        if (dictresize(mp, DK_SIZE(mp->ma_keys) - 1)) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
             Py_DECREF(res);
             return NULL;
         }
     }
     ENSURE_ALLOWS_DELETIONS(mp);
-    /* Set ep to "the first" dict entry with a value.  We abuse the hash
-     * field of slot 0 to hold a search finger:
-     * If slot 0 has a value, use slot 0.
-     * Else slot 0 is being used to hold a search finger,
-     * and we use its hash value as the first index to look.
-     */
-    ep = &mp->ma_keys->dk_entries[0];
-    if (ep->me_value == NULL) {
-        Py_ssize_t mask = DK_MASK(mp->ma_keys);
-        i = ep->me_hash;
-        /* The hash field may be a real hash value, or it may be a
-         * legit search finger, or it may be a once-legit search
-         * finger that's out of bounds now because it wrapped around
-         * or the table shrunk -- simply make sure it's in bounds now.
-         */
-        if (i > mask || i < 1)
-            i = 1;              /* skip slot 0 */
-        while ((ep = &mp->ma_keys->dk_entries[i])->me_value == NULL) {
-            i++;
-            if (i > mask)
-                i = 1;
-        }
+
+    /* Pop last item */
+    ep0 = DK_ENTRIES(mp->ma_keys);
+    i = mp->ma_keys->dk_nentries - 1;
+    while (i >= 0 && ep0[i].me_value == NULL) {
+        i--;
     }
+    assert(i >= 0);
+
+    ep = &ep0[i];
+    j = lookdict_index(mp->ma_keys, ep->me_hash, i);
+    assert(j >= 0);
+    assert(dk_get_index(mp->ma_keys, j) == i);
+    dk_set_index(mp->ma_keys, j, DKIX_DUMMY);
+
     PyTuple_SET_ITEM(res, 0, ep->me_key);
     PyTuple_SET_ITEM(res, 1, ep->me_value);
-    Py_INCREF(dummy);
-    ep->me_key = dummy;
+    ep->me_key = NULL;
     ep->me_value = NULL;
+    /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
+    mp->ma_keys->dk_nentries = i;
     mp->ma_used--;
-    assert(mp->ma_keys->dk_entries[0].me_value == NULL);
-    mp->ma_keys->dk_entries[0].me_hash = i + 1;  /* next place to start */
+    mp->ma_version_tag = DICT_NEXT_VERSION();
+    assert(_PyDict_CheckConsistency(mp));
     return res;
 }
 
 static int
 dict_traverse(PyObject *op, visitproc visit, void *arg)
 {
-    Py_ssize_t i, n;
     PyDictObject *mp = (PyDictObject *)op;
-    if (mp->ma_keys->dk_lookup == lookdict) {
-        for (i = 0; i < DK_SIZE(mp->ma_keys); i++) {
-            if (mp->ma_keys->dk_entries[i].me_value != NULL) {
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_key);
+    PyDictKeysObject *keys = mp->ma_keys;
+    PyDictKeyEntry *entries = DK_ENTRIES(keys);
+    Py_ssize_t i, n = keys->dk_nentries;
+
+    if (keys->dk_lookup == lookdict) {
+        for (i = 0; i < n; i++) {
+            if (entries[i].me_value != NULL) {
+                Py_VISIT(entries[i].me_value);
+                Py_VISIT(entries[i].me_key);
             }
         }
-    } else {
+    }
+    else {
         if (mp->ma_values != NULL) {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            for (i = 0; i < n; i++) {
                 Py_VISIT(mp->ma_values[i]);
             }
         }
         else {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
+            for (i = 0; i < n; i++) {
+                Py_VISIT(entries[i].me_value);
             }
         }
     }
@@ -2601,23 +3060,31 @@ static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
 Py_ssize_t
 _PyDict_SizeOf(PyDictObject *mp)
 {
-    Py_ssize_t size, res;
+    Py_ssize_t size, usable, res;
 
     size = DK_SIZE(mp->ma_keys);
+    usable = USABLE_FRACTION(size);
+
     res = _PyObject_SIZE(Py_TYPE(mp));
     if (mp->ma_values)
-        res += size * sizeof(PyObject*);
+        res += usable * sizeof(PyObject*);
     /* If the dictionary is split, the keys portion is accounted-for
        in the type object. */
     if (mp->ma_keys->dk_refcnt == 1)
-        res += sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry);
+        res += (sizeof(PyDictKeysObject)
+                - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+                + DK_IXSIZE(mp->ma_keys) * size
+                + sizeof(PyDictKeyEntry) * usable);
     return res;
 }
 
 Py_ssize_t
 _PyDict_KeysSize(PyDictKeysObject *keys)
 {
-    return sizeof(PyDictKeysObject) + (DK_SIZE(keys)-1) * sizeof(PyDictKeyEntry);
+    return (sizeof(PyDictKeysObject)
+            - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+            + DK_IXSIZE(keys) * DK_SIZE(keys)
+            + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
 }
 
 static PyObject *
@@ -2704,8 +3171,8 @@ int
 PyDict_Contains(PyObject *op, PyObject *key)
 {
     Py_hash_t hash;
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
@@ -2714,8 +3181,10 @@ PyDict_Contains(PyObject *op, PyObject *key)
         if (hash == -1)
             return -1;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    return (ep == NULL) ? -1 : (*value_addr != NULL);
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
+        return -1;
+    return (ix != DKIX_EMPTY && *value_addr != NULL);
 }
 
 /* Internal version of PyDict_Contains used when the hash value is already known */
@@ -2723,11 +3192,13 @@ int
 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
 {
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
+    Py_ssize_t ix;
 
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    return (ep == NULL) ? -1 : (*value_addr != NULL);
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
+        return -1;
+    return (ix != DKIX_EMPTY && *value_addr != NULL);
 }
 
 /* Hack to implement "key in dict" */
@@ -2761,11 +3232,13 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         _PyObject_GC_UNTRACK(d);
 
     d->ma_used = 0;
-    d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    d->ma_version_tag = DICT_NEXT_VERSION();
+    d->ma_keys = new_keys_object(PyDict_MINSIZE);
     if (d->ma_keys == NULL) {
         Py_DECREF(self);
         return NULL;
     }
+    assert(_PyDict_CheckConsistency(d));
     return self;
 }
 
@@ -2986,13 +3459,13 @@ static PyMethodDef dictiter_methods[] = {
     {NULL,              NULL}           /* sentinel */
 };
 
-static PyObject *dictiter_iternextkey(dictiterobject *di)
+static PyObject*
+dictiter_iternextkey(dictiterobject *di)
 {
     PyObject *key;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n;
     PyDictKeysObject *k;
     PyDictObject *d = di->di_dict;
-    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -3006,27 +3479,30 @@ static PyObject *dictiter_iternextkey(dictiterobject *di)
     }
 
     i = di->di_pos;
-    if (i < 0)
-        goto fail;
     k = d->ma_keys;
+    n = k->dk_nentries;
     if (d->ma_values) {
-        value_ptr = &d->ma_values[i];
-        offset = sizeof(PyObject *);
+        PyObject **value_ptr = &d->ma_values[i];
+        while (i < n && *value_ptr == NULL) {
+            value_ptr++;
+            i++;
+        }
+        if (i >= n)
+            goto fail;
+        key = DK_ENTRIES(k)[i].me_key;
     }
     else {
-        value_ptr = &k->dk_entries[i].me_value;
-        offset = sizeof(PyDictKeyEntry);
-    }
-    mask = DK_SIZE(k)-1;
-    while (i <= mask && *value_ptr == NULL) {
-        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
-        i++;
+        PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
+        while (i < n && entry_ptr->me_value == NULL) {
+            entry_ptr++;
+            i++;
+        }
+        if (i >= n)
+            goto fail;
+        key = entry_ptr->me_key;
     }
     di->di_pos = i+1;
-    if (i > mask)
-        goto fail;
     di->len--;
-    key = k->dk_entries[i].me_key;
     Py_INCREF(key);
     return key;
 
@@ -3069,12 +3545,12 @@ PyTypeObject PyDictIterKey_Type = {
     0,
 };
 
-static PyObject *dictiter_iternextvalue(dictiterobject *di)
+static PyObject *
+dictiter_iternextvalue(dictiterobject *di)
 {
     PyObject *value;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n;
     PyDictObject *d = di->di_dict;
-    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -3088,26 +3564,29 @@ static PyObject *dictiter_iternextvalue(dictiterobject *di)
     }
 
     i = di->di_pos;
-    mask = DK_SIZE(d->ma_keys)-1;
-    if (i < 0 || i > mask)
-        goto fail;
+    n = d->ma_keys->dk_nentries;
     if (d->ma_values) {
-        value_ptr = &d->ma_values[i];
-        offset = sizeof(PyObject *);
+        PyObject **value_ptr = &d->ma_values[i];
+        while (i < n && *value_ptr == NULL) {
+            value_ptr++;
+            i++;
+        }
+        if (i >= n)
+            goto fail;
+        value = *value_ptr;
     }
     else {
-        value_ptr = &d->ma_keys->dk_entries[i].me_value;
-        offset = sizeof(PyDictKeyEntry);
-    }
-    while (i <= mask && *value_ptr == NULL) {
-        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
-        i++;
-        if (i > mask)
+        PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
+        while (i < n && entry_ptr->me_value == NULL) {
+            entry_ptr++;
+            i++;
+        }
+        if (i >= n)
             goto fail;
+        value = entry_ptr->me_value;
     }
     di->di_pos = i+1;
     di->len--;
-    value = *value_ptr;
     Py_INCREF(value);
     return value;
 
@@ -3138,7 +3617,7 @@ PyTypeObject PyDictIterValue_Type = {
     PyObject_GenericGetAttr,                    /* tp_getattro */
     0,                                          /* tp_setattro */
     0,                                          /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
     0,                                          /* tp_doc */
     (traverseproc)dictiter_traverse,            /* tp_traverse */
     0,                                          /* tp_clear */
@@ -3150,12 +3629,12 @@ PyTypeObject PyDictIterValue_Type = {
     0,
 };
 
-static PyObject *dictiter_iternextitem(dictiterobject *di)
+static PyObject *
+dictiter_iternextitem(dictiterobject *di)
 {
     PyObject *key, *value, *result = di->di_result;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n;
     PyDictObject *d = di->di_dict;
-    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -3169,37 +3648,41 @@ static PyObject *dictiter_iternextitem(dictiterobject *di)
     }
 
     i = di->di_pos;
-    if (i < 0)
-        goto fail;
-    mask = DK_SIZE(d->ma_keys)-1;
+    n = d->ma_keys->dk_nentries;
     if (d->ma_values) {
-        value_ptr = &d->ma_values[i];
-        offset = sizeof(PyObject *);
+        PyObject **value_ptr = &d->ma_values[i];
+        while (i < n && *value_ptr == NULL) {
+            value_ptr++;
+            i++;
+        }
+        if (i >= n)
+            goto fail;
+        key = DK_ENTRIES(d->ma_keys)[i].me_key;
+        value = *value_ptr;
     }
     else {
-        value_ptr = &d->ma_keys->dk_entries[i].me_value;
-        offset = sizeof(PyDictKeyEntry);
-    }
-    while (i <= mask && *value_ptr == NULL) {
-        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
-        i++;
+        PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
+        while (i < n && entry_ptr->me_value == NULL) {
+            entry_ptr++;
+            i++;
+        }
+        if (i >= n)
+            goto fail;
+        key = entry_ptr->me_key;
+        value = entry_ptr->me_value;
     }
     di->di_pos = i+1;
-    if (i > mask)
-        goto fail;
-
+    di->len--;
     if (result->ob_refcnt == 1) {
         Py_INCREF(result);
         Py_DECREF(PyTuple_GET_ITEM(result, 0));
         Py_DECREF(PyTuple_GET_ITEM(result, 1));
-    } else {
+    }
+    else {
         result = PyTuple_New(2);
         if (result == NULL)
             return NULL;
     }
-    di->len--;
-    key = d->ma_keys->dk_entries[i].me_key;
-    value = *value_ptr;
     Py_INCREF(key);
     Py_INCREF(value);
     PyTuple_SET_ITEM(result, 0, key);  /* steals reference */
@@ -3838,7 +4321,7 @@ dictvalues_new(PyObject *dict)
 PyDictKeysObject *
 _PyDict_NewKeysForClass(void)
 {
-    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_SPLIT);
+    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
     if (keys == NULL)
         PyErr_Clear();
     else
@@ -3874,7 +4357,7 @@ PyObject_GenericGetDict(PyObject *obj, void *context)
 
 int
 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
-                     PyObject *key, PyObject *value)
+                      PyObject *key, PyObject *value)
 {
     PyObject *dict;
     int res;
@@ -3901,15 +4384,25 @@ _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
         else {
             int was_shared = cached == ((PyDictObject *)dict)->ma_keys;
             res = PyDict_SetItem(dict, key, value);
-            /* PyDict_SetItem() may call dictresize() and convert split table
-             * into combined table.  In such case, convert it to split
-             * table again and update type's shared key only when this is
-             * the only dict sharing key with the type.
-             */
             if (was_shared && cached != ((PyDictObject *)dict)->ma_keys) {
+                /* PyDict_SetItem() may call dictresize and convert split table
+                 * into combined table.  In such case, convert it to split
+                 * table again and update type's shared key only when this is
+                 * the only dict sharing key with the type.
+                 *
+                 * This is to allow using shared key in class like this:
+                 *
+                 *     class C:
+                 *         def __init__(self):
+                 *             # one dict resize happens
+                 *             self.a, self.b, self.c = 1, 2, 3
+                 *             self.d, self.e, self.f = 4, 5, 6
+                 *     a = C()
+                 */
                 if (cached->dk_refcnt == 1) {
                     CACHED_KEYS(tp) = make_keys_shared(dict);
-                } else {
+                }
+                else {
                     CACHED_KEYS(tp) = NULL;
                 }
                 DK_DECREF(cached);
@@ -3939,50 +4432,3 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
 {
     DK_DECREF(keys);
 }
-
-
-/* ARGSUSED */
-static PyObject *
-dummy_repr(PyObject *op)
-{
-    return PyUnicode_FromString("<dummy key>");
-}
-
-/* ARGUSED */
-static void
-dummy_dealloc(PyObject* ignore)
-{
-    /* This should never get called, but we also don't want to SEGV if
-     * we accidentally decref dummy-key out of existence.
-     */
-    Py_FatalError("deallocating <dummy key>");
-}
-
-static PyTypeObject PyDictDummy_Type = {
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "<dummy key> type",
-    0,
-    0,
-    dummy_dealloc,      /*tp_dealloc*/ /*never called*/
-    0,                  /*tp_print*/
-    0,                  /*tp_getattr*/
-    0,                  /*tp_setattr*/
-    0,                  /*tp_reserved*/
-    dummy_repr,         /*tp_repr*/
-    0,                  /*tp_as_number*/
-    0,                  /*tp_as_sequence*/
-    0,                  /*tp_as_mapping*/
-    0,                  /*tp_hash */
-    0,                  /*tp_call */
-    0,                  /*tp_str */
-    0,                  /*tp_getattro */
-    0,                  /*tp_setattro */
-    0,                  /*tp_as_buffer */
-    Py_TPFLAGS_DEFAULT, /*tp_flags */
-};
-
-static PyObject _dummy_struct = {
-  _PyObject_EXTRA_INIT
-  2, &PyDictDummy_Type
-};
-