diff options
author | Viktor Söderqvist <viktor.soderqvist@est.tech> | 2023-01-20 17:45:29 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-20 18:45:29 +0200 |
commit | f3f6f7c0d66f136146a912e06c8fbe31ecfbc977 (patch) | |
tree | 246b79d2ca0ac0d5420ad6a86512702ae7957b75 /src/defrag.c | |
parent | b4123663c31aaf1e97f4dbd630cbd7b8d0e91e31 (diff) | |
download | redis-f3f6f7c0d66f136146a912e06c8fbe31ecfbc977.tar.gz |
Key as dict entry - memory optimization for sets (#11595)
If a dict has only keys, and no use of values, then a key can be stored directly in a
dict's hashtable. The key replaces the dictEntry. To distinguish between a key and
a dictEntry, we only use this optimization if the key is odd, i.e. if the key has the least
significant bit set. This is true for sds strings, since the sds header is always an odd
number of bytes.
Dict entries are used as a fallback when there is a hash collision. A special dict entry
without a value (only key and next) is used so we save one word in this case too.
This saves 24 bytes per set element for larges sets, and also gains some speed improvement
as a side effect (less allocations and cache misses).
A quick test adding 1M elements to a set using the command below resulted in memory
usage of 28.83M, compared to 46.29M on unstable.
That's 18 bytes per set element on average.
eval 'for i=1,1000000,1 do redis.call("sadd", "myset", "x"..i) end' 0
Other changes:
Allocations are ensured to have at least 8 bits alignment on all systems. This affects 32-bit
builds compiled without HAVE_MALLOC_SIZE (not jemalloc or glibc) in which Redis
stores the size of each allocation, after this change in 8 bytes instead of previously 4 bytes
per allocation. This is done so we can reliably use the 3 least significant bits in a pointer to
encode stuff.
Diffstat (limited to 'src/defrag.c')
-rw-r--r-- | src/defrag.c | 105 |
1 files changed, 34 insertions, 71 deletions
diff --git a/src/defrag.c b/src/defrag.c index b813ab74f..5b7fdb775 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -238,46 +238,26 @@ void activeDefragZsetEntry(zset *zs, dictEntry *de) { #define DEFRAG_SDS_DICT_VAL_VOID_PTR 3 #define DEFRAG_SDS_DICT_VAL_LUA_SCRIPT 4 -typedef struct { - dict *dict; - int val_type; -} activeDefragSdsDictData; - -void activeDefragSdsDictCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - activeDefragSdsDictData *data = privdata; - dict *d = data->dict; - int val_type = data->val_type; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(d, de, newsds); - /* defrag the value */ - if (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS) { - sdsele = dictGetVal(de); - if ((newsds = activeDefragSds(sdsele))) - dictSetVal(d, de, newsds); - } else if (val_type == DEFRAG_SDS_DICT_VAL_IS_STROB) { - robj *newele, *ele = dictGetVal(de); - if ((newele = activeDefragStringOb(ele))) - dictSetVal(d, de, newele); - } else if (val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) { - void *newptr, *ptr = dictGetVal(de); - if ((newptr = activeDefragAlloc(ptr))) - dictSetVal(d, de, newptr); - } else if (val_type == DEFRAG_SDS_DICT_VAL_LUA_SCRIPT) { - void *newptr, *ptr = dictGetVal(de); - if ((newptr = activeDefragLuaScript(ptr))) - dictSetVal(d, de, newptr); - } +void activeDefragSdsDictCallback(void *privdata, const dictEntry *de) { + UNUSED(privdata); + UNUSED(de); } /* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */ void activeDefragSdsDict(dict* d, int val_type) { - activeDefragSdsDictData data = {d, val_type}; unsigned long cursor = 0; + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds, + .defragVal = (val_type == DEFRAG_SDS_DICT_VAL_IS_SDS ? (dictDefragAllocFunction *)activeDefragSds : + val_type == DEFRAG_SDS_DICT_VAL_IS_STROB ? (dictDefragAllocFunction *)activeDefragStringOb : + val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR ? (dictDefragAllocFunction *)activeDefragAlloc : + val_type == DEFRAG_SDS_DICT_VAL_LUA_SCRIPT ? (dictDefragAllocFunction *)activeDefragLuaScript : + NULL) + }; do { cursor = dictScanDefrag(d, cursor, activeDefragSdsDictCallback, - activeDefragAlloc, &data); + &defragfns, NULL); } while (cursor != 0); } @@ -407,19 +387,14 @@ void scanLaterZset(robj *ob, unsigned long *cursor) { zset *zs = (zset*)ob->ptr; dict *d = zs->dict; scanLaterZsetData data = {zs}; - *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, activeDefragAlloc, &data); + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; + *cursor = dictScanDefrag(d, *cursor, scanLaterZsetCallback, &defragfns, &data); } -typedef struct { - dict *dict; -} scanLaterDictData; - -void scanLaterSetCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - scanLaterDictData *data = privdata; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(data->dict, de, newsds); +/* Used as scan callback when all the work is done in the dictDefragFunctions. */ +void scanCallbackCountScanned(void *privdata, const dictEntry *de) { + UNUSED(privdata); + UNUSED(de); server.stat_active_defrag_scanned++; } @@ -427,28 +402,23 @@ void scanLaterSet(robj *ob, unsigned long *cursor) { if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT) return; dict *d = ob->ptr; - scanLaterDictData data = {d}; - *cursor = dictScanDefrag(d, *cursor, scanLaterSetCallback, activeDefragAlloc, &data); -} - -void scanLaterHashCallback(void *privdata, const dictEntry *_de) { - dictEntry *de = (dictEntry*)_de; - scanLaterDictData *data = privdata; - sds sdsele = dictGetKey(de), newsds; - if ((newsds = activeDefragSds(sdsele))) - dictSetKey(data->dict, de, newsds); - sdsele = dictGetVal(de); - if ((newsds = activeDefragSds(sdsele))) - dictSetVal(data->dict, de, newsds); - server.stat_active_defrag_scanned++; + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds + }; + *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL); } void scanLaterHash(robj *ob, unsigned long *cursor) { if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT) return; dict *d = ob->ptr; - scanLaterDictData data = {d}; - *cursor = dictScanDefrag(d, *cursor, scanLaterHashCallback, activeDefragAlloc, &data); + dictDefragFunctions defragfns = { + .defragAlloc = activeDefragAlloc, + .defragKey = (dictDefragAllocFunction *)activeDefragSds, + .defragVal = (dictDefragAllocFunction *)activeDefragSds + }; + *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL); } void defragQuicklist(redisDb *db, dictEntry *kde) { @@ -788,14 +758,6 @@ void defragScanCallback(void *privdata, const dictEntry *de) { server.stat_active_defrag_scanned++; } -/* Dummy scan callback used when defragging the expire dictionary. We only - * defrag the entries, which is done per bucket. */ -void defragExpireScanCallback(void *privdata, const dictEntry *de) { - UNUSED(privdata); - UNUSED(de); - server.stat_active_defrag_scanned++; -} - /* Utility function to get the fragmentation ratio from jemalloc. * It is critical to do that by comparing only heap maps that belong to * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this @@ -1003,6 +965,7 @@ void activeDefragCycle(void) { endtime = start + timelimit; latencyStartMonitor(latency); + dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc}; do { /* if we're not continuing a scan from the last call or loop, start a new one */ if (!cursor && !expires_cursor) { @@ -1055,13 +1018,13 @@ void activeDefragCycle(void) { /* Scan the keyspace dict unless we're scanning the expire dict. */ if (!expires_cursor) cursor = dictScanDefrag(db->dict, cursor, defragScanCallback, - activeDefragAlloc, db); + &defragfns, db); /* When done scanning the keyspace dict, we scan the expire dict. */ if (!cursor) expires_cursor = dictScanDefrag(db->expires, expires_cursor, - defragExpireScanCallback, - activeDefragAlloc, NULL); + scanCallbackCountScanned, + &defragfns, NULL); /* Once in 16 scan iterations, 512 pointer reallocations. or 64 keys * (if we have a lot of pointers in one hash bucket or rehashing), |