summaryrefslogtreecommitdiff
path: root/src/t_set.c
diff options
context:
space:
mode:
authorViktor Söderqvist <viktor.soderqvist@est.tech>2023-01-20 17:45:29 +0100
committerGitHub <noreply@github.com>2023-01-20 18:45:29 +0200
commitf3f6f7c0d66f136146a912e06c8fbe31ecfbc977 (patch)
tree246b79d2ca0ac0d5420ad6a86512702ae7957b75 /src/t_set.c
parentb4123663c31aaf1e97f4dbd630cbd7b8d0e91e31 (diff)
downloadredis-f3f6f7c0d66f136146a912e06c8fbe31ecfbc977.tar.gz
Key as dict entry - memory optimization for sets (#11595)
If a dict has only keys, and no use of values, then a key can be stored directly in a dict's hashtable. The key replaces the dictEntry. To distinguish between a key and a dictEntry, we only use this optimization if the key is odd, i.e. if the key has the least significant bit set. This is true for sds strings, since the sds header is always an odd number of bytes. Dict entries are used as a fallback when there is a hash collision. A special dict entry without a value (only key and next) is used so we save one word in this case too. This saves 24 bytes per set element for larges sets, and also gains some speed improvement as a side effect (less allocations and cache misses). A quick test adding 1M elements to a set using the command below resulted in memory usage of 28.83M, compared to 46.29M on unstable. That's 18 bytes per set element on average. eval 'for i=1,1000000,1 do redis.call("sadd", "myset", "x"..i) end' 0 Other changes: Allocations are ensured to have at least 8 bits alignment on all systems. This affects 32-bit builds compiled without HAVE_MALLOC_SIZE (not jemalloc or glibc) in which Redis stores the size of each allocation, after this change in 8 bytes instead of previously 4 bytes per allocation. This is done so we can reliably use the 3 least significant bits in a pointer to encode stuff.
Diffstat (limited to 'src/t_set.c')
-rw-r--r--src/t_set.c17
1 files changed, 8 insertions, 9 deletions
diff --git a/src/t_set.c b/src/t_set.c
index 6d8023f31..7b0dbed5a 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -122,17 +122,16 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
/* Avoid duping the string if it is an sds string. */
sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
dict *ht = set->ptr;
- dictEntry *de = dictAddRaw(ht,sdsval,NULL);
- if (de && sdsval == str) {
- /* String was added but we don't own this sds string. Dup it and
- * replace it in the dict entry. */
- dictSetKey(ht,de,sdsdup((sds)str));
- dictSetVal(ht,de,NULL);
- } else if (!de && sdsval != str) {
- /* String was already a member. Free our temporary sds copy. */
+ void *position = dictFindPositionForInsert(ht, sdsval, NULL);
+ if (position) {
+ /* Key doesn't already exist in the set. Add it but dup the key. */
+ if (sdsval == str) sdsval = sdsdup(sdsval);
+ dictInsertAtPosition(ht, sdsval, position);
+ } else if (sdsval != str) {
+ /* String is already a member. Free our temporary sds copy. */
sdsfree(sdsval);
}
- return (de != NULL);
+ return (position != NULL);
} else if (set->encoding == OBJ_ENCODING_LISTPACK) {
unsigned char *lp = set->ptr;
unsigned char *p = lpFirst(lp);