From 7a1aba85e771486e07e4d1d72bbd8d17ce53e0f1 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 7 Sep 2016 17:40:12 -0700 Subject: Implement compact dict Issue #27350: `dict` implementation is changed like PyPy. It is more compact and preserves insertion order. _PyDict_Dummy() function has been removed. Disable test_gdb: python-gdb.py is not updated yet to the new structure of compact dictionaries (issue #28023). Patch written by INADA Naoki. --- Objects/dict-common.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'Objects/dict-common.h') diff --git a/Objects/dict-common.h b/Objects/dict-common.h index 2912eb94ea..5f9afdb1f9 100644 --- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -8,15 +8,25 @@ typedef struct { PyObject *me_value; /* This field is only meaningful for combined tables */ } PyDictKeyEntry; -typedef PyDictKeyEntry *(*dict_lookup_func) -(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr); +/* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index]. + * -1 when no entry found, -3 when compare raises error. + */ +typedef Py_ssize_t (*dict_lookup_func) +(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr, + Py_ssize_t *hashpos); +#define DKIX_EMPTY (-1) +#define DKIX_DUMMY (-2) /* Used internally */ +#define DKIX_ERROR (-3) + +/* See dictobject.c for actual layout of DictKeysObject */ struct _dictkeysobject { Py_ssize_t dk_refcnt; Py_ssize_t dk_size; dict_lookup_func dk_lookup; Py_ssize_t dk_usable; - PyDictKeyEntry dk_entries[1]; + Py_ssize_t dk_nentries; /* How many entries are used. */ + char dk_indices[8]; /* dynamically sized. 8 is minimum. */ }; #endif -- cgit v1.2.1 From b38c6a5698d3d48a01b32f4abdf8b27f94206338 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 8 Sep 2016 12:01:25 -0700 Subject: Add documentation to the dict implementation Issue #27350. --- Objects/dict-common.h | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'Objects/dict-common.h') diff --git a/Objects/dict-common.h b/Objects/dict-common.h index 5f9afdb1f9..d31cafea1e 100644 --- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -22,11 +22,50 @@ typedef Py_ssize_t (*dict_lookup_func) /* See dictobject.c for actual layout of DictKeysObject */ struct _dictkeysobject { Py_ssize_t dk_refcnt; + + /* Size of the hash table (dk_indices). It must be a power of 2. */ Py_ssize_t dk_size; + + /* Function to lookup in the hash table (dk_indices): + + - lookdict(): general-purpose, and may return DKIX_ERROR if (and + only if) a comparison raises an exception. + + - lookdict_unicode(): specialized to Unicode string keys, comparison of + which can never raise an exception; that function can never return + DKIX_ERROR. + + - lookdict_unicode_nodummy(): similar to lookdict_unicode() but further + specialized for Unicode string keys that cannot be the value. + + - lookdict_split(): Version of lookdict() for split tables. */ dict_lookup_func dk_lookup; + + /* Number of usable entries in dk_entries. + 0 <= dk_usable <= USABLE_FRACTION(dk_size) */ Py_ssize_t dk_usable; - Py_ssize_t dk_nentries; /* How many entries are used. */ - char dk_indices[8]; /* dynamically sized. 8 is minimum. */ + + /* Number of used entries in dk_entries. + 0 <= dk_nentries < dk_size */ + Py_ssize_t dk_nentries; + + /* Actual hash table of dk_size entries. It holds indices in dk_entries, + or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). + + Indices must be: 0 <= indice < USABLE_FRACTION(dk_size). + + The size in bytes of an indice depends on dk_size: + + - 1 byte if dk_size <= 0xff (char*) + - 2 bytes if dk_size <= 0xffff (int16_t*) + - 4 bytes if dk_size <= 0xffffffff (int32_t*) + - 8 bytes otherwise (Py_ssize_t*) + + Dynamically sized, 8 is minimum. */ + char dk_indices[8]; + + /* "PyDictKeyEntry dk_entries[dk_usable];" array follows: + see the DK_ENTRIES() macro */ }; #endif -- cgit v1.2.1 From bc337fb4df691c1e6d33891f9b0693c687f53d7a Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 8 Sep 2016 12:20:12 -0700 Subject: access dk_indices through a union --- Objects/dict-common.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'Objects/dict-common.h') diff --git a/Objects/dict-common.h b/Objects/dict-common.h index d31cafea1e..865c020c25 100644 --- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -62,7 +62,12 @@ struct _dictkeysobject { - 8 bytes otherwise (Py_ssize_t*) Dynamically sized, 8 is minimum. */ - char dk_indices[8]; + union { + int8_t as_1[8]; + int16_t as_2[4]; + int32_t as_4[2]; + int64_t as_8[1]; + } dk_indices; /* "PyDictKeyEntry dk_entries[dk_usable];" array follows: see the DK_ENTRIES() macro */ -- cgit v1.2.1 From 8ca26ec20ad97063e314ea992929c3d4eec827bd Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 8 Sep 2016 13:16:41 -0700 Subject: do not worry about 64-bit dict sizes on 32-bit platforms --- Objects/dict-common.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'Objects/dict-common.h') diff --git a/Objects/dict-common.h b/Objects/dict-common.h index 865c020c25..c3baf59ef2 100644 --- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -59,14 +59,16 @@ struct _dictkeysobject { - 1 byte if dk_size <= 0xff (char*) - 2 bytes if dk_size <= 0xffff (int16_t*) - 4 bytes if dk_size <= 0xffffffff (int32_t*) - - 8 bytes otherwise (Py_ssize_t*) + - 8 bytes otherwise (int64_t*) Dynamically sized, 8 is minimum. */ union { int8_t as_1[8]; int16_t as_2[4]; int32_t as_4[2]; +#if SIZEOF_VOID_P > 4 int64_t as_8[1]; +#endif } dk_indices; /* "PyDictKeyEntry dk_entries[dk_usable];" array follows: -- cgit v1.2.1 From f59321cd121d5deb6265e1e9c209c5fb04d3f923 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Sep 2016 15:02:01 +0200 Subject: Add _PyDict_CheckConsistency() Issue #28127: Add a function to check that a dictionary remains consistent after any change. By default, tables are not checked, only basic attributes. Define DEBUG_PYDICT (ex: gcc -D DEBUG_PYDICT) to also check dictionary "content". --- Objects/dict-common.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'Objects/dict-common.h') diff --git a/Objects/dict-common.h b/Objects/dict-common.h index c3baf59ef2..ce9edabd89 100644 --- a/Objects/dict-common.h +++ b/Objects/dict-common.h @@ -41,12 +41,10 @@ struct _dictkeysobject { - lookdict_split(): Version of lookdict() for split tables. */ dict_lookup_func dk_lookup; - /* Number of usable entries in dk_entries. - 0 <= dk_usable <= USABLE_FRACTION(dk_size) */ + /* Number of usable entries in dk_entries. */ Py_ssize_t dk_usable; - /* Number of used entries in dk_entries. - 0 <= dk_nentries < dk_size */ + /* Number of used entries in dk_entries. */ Py_ssize_t dk_nentries; /* Actual hash table of dk_size entries. It holds indices in dk_entries, -- cgit v1.2.1