summaryrefslogtreecommitdiff
path: root/Objects/dictobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/dictobject.c')
-rw-r--r--Objects/dictobject.c200
1 files changed, 96 insertions, 104 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index d8ab91fb12..320dff6dfc 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -10,8 +10,9 @@
This implements the dictionary's hashtable.
-As of Python 3.6, this is compact and ordered. Basic idea is described here.
-https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
+As of Python 3.6, this is compact and ordered. Basic idea is described here:
+* https://mail.python.org/pipermail/python-dev/2012-December/123028.html
+* https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
layout:
@@ -682,7 +683,7 @@ the <dummy> value.
For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns
where the key index should be inserted.
*/
-static Py_ssize_t
+static Py_ssize_t _Py_HOT_FUNCTION
lookdict(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
@@ -797,7 +798,7 @@ top:
}
/* Specialized version for string-only keys */
-static Py_ssize_t
+static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
@@ -872,7 +873,7 @@ lookdict_unicode(PyDictObject *mp, PyObject *key,
/* Faster version of lookdict_unicode when it is known that no <dummy> keys
* will be present. */
-static Py_ssize_t
+static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr,
Py_ssize_t *hashpos)
@@ -940,7 +941,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
* Split tables only contain unicode keys and no dummy keys,
* so algorithm is the same as lookdict_unicode_nodummy.
*/
-static Py_ssize_t
+static Py_ssize_t _Py_HOT_FUNCTION
lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{
@@ -1197,41 +1198,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
}
/*
-Internal routine used by dictresize() to insert an item which is
-known to be absent from the dict. This routine also assumes that
-the dict contains no deleted entries. Besides the performance benefit,
-using insertdict() in dictresize() is dangerous (SF bug #1456209).
-Note that no refcounts are changed by this routine; if needed, the caller
-is responsible for incref'ing `key` and `value`.
-Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
-must set them correctly
+Internal routine used by dictresize() to buid a hashtable of entries.
*/
static void
-insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
- PyObject *value)
+build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
{
- size_t i;
- PyDictKeysObject *k = mp->ma_keys;
- size_t mask = (size_t)DK_SIZE(k)-1;
- PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
- PyDictKeyEntry *ep;
-
- assert(k->dk_lookup != NULL);
- assert(value != NULL);
- assert(key != NULL);
- assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
- i = hash & mask;
- for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
- perturb >>= PERTURB_SHIFT;
- i = mask & ((i << 2) + i + perturb + 1);
+ size_t mask = (size_t)DK_SIZE(keys) - 1;
+ for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
+ Py_hash_t hash = ep->me_hash;
+ size_t i = hash & mask;
+ for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
+ perturb >>= PERTURB_SHIFT;
+ i = mask & ((i << 2) + i + perturb + 1);
+ }
+ dk_set_index(keys, i, ix);
}
- ep = &ep0[k->dk_nentries];
- assert(ep->me_value == NULL);
- dk_set_index(k, i, k->dk_nentries);
- k->dk_nentries++;
- ep->me_key = key;
- ep->me_hash = hash;
- ep->me_value = value;
}
/*
@@ -1247,10 +1228,10 @@ but can be resplit by make_keys_shared().
static int
dictresize(PyDictObject *mp, Py_ssize_t minused)
{
- Py_ssize_t i, newsize;
+ Py_ssize_t newsize, numentries;
PyDictKeysObject *oldkeys;
PyObject **oldvalues;
- PyDictKeyEntry *ep0;
+ PyDictKeyEntry *oldentries, *newentries;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
@@ -1261,8 +1242,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
PyErr_NoMemory();
return -1;
}
+
oldkeys = mp->ma_keys;
- oldvalues = mp->ma_values;
+
+ /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
+ * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
+ * TODO: Try reusing oldkeys when reimplement odict.
+ */
+
/* Allocate a new table. */
mp->ma_keys = new_keys_object(newsize);
if (mp->ma_keys == NULL) {
@@ -1271,42 +1258,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
}
if (oldkeys->dk_lookup == lookdict)
mp->ma_keys->dk_lookup = lookdict;
- mp->ma_values = NULL;
- ep0 = DK_ENTRIES(oldkeys);
- /* Main loop below assumes we can transfer refcount to new keys
- * and that value is stored in me_value.
- * Increment ref-counts and copy values here to compensate
- * This (resizing a split table) should be relatively rare */
+
+ numentries = mp->ma_used;
+ oldentries = DK_ENTRIES(oldkeys);
+ newentries = DK_ENTRIES(mp->ma_keys);
+ oldvalues = mp->ma_values;
if (oldvalues != NULL) {
- for (i = 0; i < oldkeys->dk_nentries; i++) {
- if (oldvalues[i] != NULL) {
- Py_INCREF(ep0[i].me_key);
- ep0[i].me_value = oldvalues[i];
- }
- }
- }
- /* Main loop */
- for (i = 0; i < oldkeys->dk_nentries; i++) {
- PyDictKeyEntry *ep = &ep0[i];
- if (ep->me_value != NULL) {
- insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
+ /* Convert split table into new combined table.
+ * We must incref keys; we can transfer values.
+ * Note that values of split table is always dense.
+ */
+ for (Py_ssize_t i = 0; i < numentries; i++) {
+ assert(oldvalues[i] != NULL);
+ PyDictKeyEntry *ep = &oldentries[i];
+ PyObject *key = ep->me_key;
+ Py_INCREF(key);
+ newentries[i].me_key = key;
+ newentries[i].me_hash = ep->me_hash;
+ newentries[i].me_value = oldvalues[i];
}
- }
- mp->ma_keys->dk_usable -= mp->ma_used;
- if (oldvalues != NULL) {
- /* NULL out me_value slot in oldkeys, in case it was shared */
- for (i = 0; i < oldkeys->dk_nentries; i++)
- ep0[i].me_value = NULL;
+
DK_DECREF(oldkeys);
+ mp->ma_values = NULL;
if (oldvalues != empty_values) {
free_values(oldvalues);
}
}
- else {
+ else { // combined table.
+ if (oldkeys->dk_nentries == numentries) {
+ memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
+ }
+ else {
+ PyDictKeyEntry *ep = oldentries;
+ for (Py_ssize_t i = 0; i < numentries; i++) {
+ while (ep->me_value == NULL)
+ ep++;
+ newentries[i] = *ep++;
+ }
+ }
+
assert(oldkeys->dk_lookup != lookdict_split);
assert(oldkeys->dk_refcnt == 1);
- DK_DEBUG_DECREF PyObject_FREE(oldkeys);
+ if (oldkeys->dk_size == PyDict_MINSIZE &&
+ numfreekeys < PyDict_MAXFREELIST) {
+ DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
+ }
+ else {
+ DK_DEBUG_DECREF PyObject_FREE(oldkeys);
+ }
}
+
+ build_indices(mp->ma_keys, newentries, numentries);
+ mp->ma_keys->dk_usable -= numentries;
+ mp->ma_keys->dk_nentries = numentries;
return 0;
}
@@ -1405,7 +1409,7 @@ PyDict_GetItem(PyObject *op, PyObject *key)
Let's just hope that no exception occurs then... This must be
_PyThreadState_Current and not PyThreadState_GET() because in debug
mode, the latter complains if tstate is NULL. */
- tstate = _PyThreadState_UncheckedGet();
+ tstate = PyThreadState_GET();
if (tstate != NULL && tstate->curexc_type != NULL) {
/* preserve the existing exception */
PyObject *err_type, *err_value, *err_tb;
@@ -1687,7 +1691,7 @@ int
_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
PyObject **pvalue, Py_hash_t *phash)
{
- Py_ssize_t i, n;
+ Py_ssize_t i;
PyDictObject *mp;
PyDictKeyEntry *entry_ptr;
PyObject *value;
@@ -1696,21 +1700,18 @@ _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
return 0;
mp = (PyDictObject *)op;
i = *ppos;
- n = mp->ma_keys->dk_nentries;
- if ((size_t)i >= (size_t)n)
- return 0;
if (mp->ma_values) {
- PyObject **value_ptr = &mp->ma_values[i];
- while (i < n && *value_ptr == NULL) {
- value_ptr++;
- i++;
- }
- if (i >= n)
+ if (i < 0 || i >= mp->ma_used)
return 0;
+ /* values of split table is always dense */
entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
- value = *value_ptr;
+ value = mp->ma_values[i];
+ assert(value != NULL);
}
else {
+ Py_ssize_t n = mp->ma_keys->dk_nentries;
+ if (i < 0 || i >= n)
+ return 0;
entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
while (i < n && entry_ptr->me_value == NULL) {
entry_ptr++;
@@ -3375,7 +3376,7 @@ static PyObject*
dictiter_iternextkey(dictiterobject *di)
{
PyObject *key;
- Py_ssize_t i, n;
+ Py_ssize_t i;
PyDictKeysObject *k;
PyDictObject *d = di->di_dict;
@@ -3392,18 +3393,15 @@ dictiter_iternextkey(dictiterobject *di)
i = di->di_pos;
k = d->ma_keys;
- n = k->dk_nentries;
+ assert(i >= 0);
if (d->ma_values) {
- PyObject **value_ptr = &d->ma_values[i];
- while (i < n && *value_ptr == NULL) {
- value_ptr++;
- i++;
- }
- if (i >= n)
+ if (i >= d->ma_used)
goto fail;
key = DK_ENTRIES(k)[i].me_key;
+ assert(d->ma_values[i] != NULL);
}
else {
+ Py_ssize_t n = k->dk_nentries;
PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
while (i < n && entry_ptr->me_value == NULL) {
entry_ptr++;
@@ -3461,7 +3459,7 @@ static PyObject *
dictiter_iternextvalue(dictiterobject *di)
{
PyObject *value;
- Py_ssize_t i, n;
+ Py_ssize_t i;
PyDictObject *d = di->di_dict;
if (d == NULL)
@@ -3476,18 +3474,15 @@ dictiter_iternextvalue(dictiterobject *di)
}
i = di->di_pos;
- n = d->ma_keys->dk_nentries;
+ assert(i >= 0);
if (d->ma_values) {
- PyObject **value_ptr = &d->ma_values[i];
- while (i < n && *value_ptr == NULL) {
- value_ptr++;
- i++;
- }
- if (i >= n)
+ if (i >= d->ma_used)
goto fail;
- value = *value_ptr;
+ value = d->ma_values[i];
+ assert(value != NULL);
}
else {
+ Py_ssize_t n = d->ma_keys->dk_nentries;
PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
while (i < n && entry_ptr->me_value == NULL) {
entry_ptr++;
@@ -3545,7 +3540,7 @@ static PyObject *
dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
- Py_ssize_t i, n;
+ Py_ssize_t i;
PyDictObject *d = di->di_dict;
if (d == NULL)
@@ -3560,19 +3555,16 @@ dictiter_iternextitem(dictiterobject *di)
}
i = di->di_pos;
- n = d->ma_keys->dk_nentries;
+ assert(i >= 0);
if (d->ma_values) {
- PyObject **value_ptr = &d->ma_values[i];
- while (i < n && *value_ptr == NULL) {
- value_ptr++;
- i++;
- }
- if (i >= n)
+ if (i >= d->ma_used)
goto fail;
key = DK_ENTRIES(d->ma_keys)[i].me_key;
- value = *value_ptr;
+ value = d->ma_values[i];
+ assert(value != NULL);
}
else {
+ Py_ssize_t n = d->ma_keys->dk_nentries;
PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
while (i < n && entry_ptr->me_value == NULL) {
entry_ptr++;