summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViktor Söderqvist <viktor.soderqvist@est.tech>2022-11-09 18:50:07 +0100
committerGitHub <noreply@github.com>2022-11-09 19:50:07 +0200
commit4e472a1a7fc0dc2c7da2b48ac7342e9385b4f92a (patch)
tree62715e6a8fb51264449aa6ef0866f92d9d1e00bf
parent07d187066a86a9a3124af740373bf5bf49e345ff (diff)
downloadredis-4e472a1a7fc0dc2c7da2b48ac7342e9385b4f92a.tar.gz
Listpack encoding for sets (#11290)
Small sets with not only integer elements are listpack encoded, by default up to 128 elements, max 64 bytes per element, new config `set-max-listpack-entries` and `set-max-listpack-value`. This saves memory for small sets compared to using a hashtable. Sets with only integers, even very small sets, are still intset encoded (up to 1G limit, etc.). Larger sets are hashtable encoded. This PR increments the RDB version, and has an effect on OBJECT ENCODING Possible conversions when elements are added: intset -> listpack listpack -> hashtable intset -> hashtable Note: No conversion happens when elements are deleted. If all elements are deleted and then added again, the set is deleted and recreated, thus implicitly converted to a smaller encoding.
-rw-r--r--redis.conf9
-rw-r--r--src/aof.c67
-rw-r--r--src/config.c2
-rw-r--r--src/db.c6
-rw-r--r--src/defrag.c10
-rw-r--r--src/intset.c13
-rw-r--r--src/intset.h2
-rw-r--r--src/listpack.c260
-rw-r--r--src/listpack.h4
-rw-r--r--src/object.c10
-rw-r--r--src/rdb.c87
-rw-r--r--src/rdb.h5
-rw-r--r--src/redis-check-rdb.c3
-rw-r--r--src/server.h8
-rw-r--r--src/t_set.c727
-rw-r--r--src/t_zset.c26
-rw-r--r--tests/unit/keyspace.tcl45
-rw-r--r--tests/unit/scan.tcl7
-rw-r--r--tests/unit/type/set.tcl185
19 files changed, 1132 insertions, 344 deletions
diff --git a/redis.conf b/redis.conf
index dfc9de308..408d0c258 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1951,13 +1951,20 @@ list-max-listpack-size -2
# etc.
list-compress-depth 0
-# Sets have a special encoding in just one case: when a set is composed
+# Sets have a special encoding when a set is composed
# of just strings that happen to be integers in radix 10 in the range
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
set-max-intset-entries 512
+# Sets containing non-integer values are also encoded using a memory efficient
+# data structure when they have a small number of entries, and the biggest entry
+# does not exceed a given threshold. These thresholds can be configured using
+# the following directives.
+set-max-listpack-entries 128
+set-max-listpack-value 64
+
# Similarly to hashes and lists, sorted sets are also specially encoded in
# order to save a lot of space. This encoding is only used when the length and
# elements of a sorted set are below the following limits:
diff --git a/src/aof.c b/src/aof.c
index 52026ab24..f1bf9a1a6 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
* The function returns 0 on error, 1 on success. */
int rewriteSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = setTypeSize(o);
-
- if (o->encoding == OBJ_ENCODING_INTSET) {
- int ii = 0;
- int64_t llval;
-
- while(intsetGet(o->ptr,ii++,&llval)) {
- if (count == 0) {
- int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
- AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
- !rioWriteBulkString(r,"SADD",4) ||
- !rioWriteBulkObject(r,key))
- {
- return 0;
- }
+ setTypeIterator *si = setTypeInitIterator(o);
+ char *str;
+ size_t len;
+ int64_t llval;
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+ if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
+ !rioWriteBulkString(r,"SADD",4) ||
+ !rioWriteBulkObject(r,key))
+ {
+ return 0;
}
- if (!rioWriteBulkLongLong(r,llval)) return 0;
- if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
}
- } else if (o->encoding == OBJ_ENCODING_HT) {
- dictIterator *di = dictGetIterator(o->ptr);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- sds ele = dictGetKey(de);
- if (count == 0) {
- int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
- AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
- !rioWriteBulkString(r,"SADD",4) ||
- !rioWriteBulkObject(r,key))
- {
- dictReleaseIterator(di);
- return 0;
- }
- }
- if (!rioWriteBulkString(r,ele,sdslen(ele))) {
- dictReleaseIterator(di);
- return 0;
- }
- if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
+ size_t written = str ?
+ rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
+ if (!written) {
+ setTypeReleaseIterator(si);
+ return 0;
}
- dictReleaseIterator(di);
- } else {
- serverPanic("Unknown set encoding");
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
}
+ setTypeReleaseIterator(si);
return 1;
}
diff --git a/src/config.c b/src/config.c
index b11e4dda4..c5eab41a9 100644
--- a/src/config.c
+++ b/src/config.c
@@ -3130,6 +3130,8 @@ standardConfig static_configs[] = {
/* Size_t configs */
createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
diff --git a/src/db.c b/src/db.c
index a560ce052..5ebd21e4b 100644
--- a/src/db.c
+++ b/src/db.c
@@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
} while (cursor &&
maxiterations-- &&
listLength(keys) < (unsigned long)count);
- } else if (o->type == OBJ_SET) {
+ } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) {
int pos = 0;
int64_t ll;
while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
- } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
+ } else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) &&
+ o->encoding == OBJ_ENCODING_LISTPACK)
+ {
unsigned char *p = lpFirst(o->ptr);
unsigned char *vstr;
int64_t vlen;
diff --git a/src/defrag.c b/src/defrag.c
index ced4fd20a..e78c07929 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) {
} else if (ob->type == OBJ_SET) {
if (ob->encoding == OBJ_ENCODING_HT) {
defragged += defragSet(db, de);
- } else if (ob->encoding == OBJ_ENCODING_INTSET) {
- intset *newis, *is = ob->ptr;
- if ((newis = activeDefragAlloc(is)))
- defragged++, ob->ptr = newis;
+ } else if (ob->encoding == OBJ_ENCODING_INTSET ||
+ ob->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ void *newptr, *ptr = ob->ptr;
+ if ((newptr = activeDefragAlloc(ptr)))
+ defragged++, ob->ptr = newptr;
} else {
serverPanic("Unknown set encoding");
}
diff --git a/src/intset.c b/src/intset.c
index e96037da8..621a74283 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%len);
}
+/* Return the largest member. */
+int64_t intsetMax(intset *is) {
+ uint32_t len = intrev32ifbe(is->length);
+ return _intsetGet(is, len - 1);
+}
+
+/* Return the smallest member. */
+int64_t intsetMin(intset *is) {
+ return _intsetGet(is, 0);
+}
+
/* Get the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) {
is = intsetAdd(is,6,&success); assert(success);
is = intsetAdd(is,4,&success); assert(success);
is = intsetAdd(is,4,&success); assert(!success);
+ assert(6 == intsetMax(is));
+ assert(4 == intsetMin(is));
ok();
zfree(is);
}
diff --git a/src/intset.h b/src/intset.h
index 772f847b6..41cc7b822 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
+int64_t intsetMax(intset *is);
+int64_t intsetMin(intset *is);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(const intset *is);
size_t intsetBlobLen(intset *is);
diff --git a/src/listpack.c b/src/listpack.c
index db9a1b4ab..bfa364f98 100644
--- a/src/listpack.c
+++ b/src/listpack.c
@@ -1063,6 +1063,55 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {
return lp;
}
+/* Delete the elements 'ps' passed as an array of 'count' element pointers and
+ * return the resulting listpack. The elements must be given in the same order
+ * as they apper in the listpack. */
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count) {
+ if (count == 0) return lp;
+ unsigned char *dst = ps[0];
+ size_t total_bytes = lpGetTotalBytes(lp);
+ unsigned char *lp_end = lp + total_bytes; /* After the EOF element. */
+ assert(lp_end[-1] == LP_EOF);
+ /*
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ... | Delete | Keep | Delete | Keep | ... |EOF|
+ * ... |xxxxxxxx| |xxxxxxxx| | ... | |
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ^ ^ ^ ^
+ * | | | |
+ * ps[i] | ps[i+1] |
+ * skip keep_start keep_end lp_end
+ *
+ * The loop memmoves the bytes between keep_start and keep_end to dst.
+ */
+ for (unsigned long i = 0; i < count; i++) {
+ unsigned char *skip = ps[i];
+ assert(skip != NULL && skip[0] != LP_EOF);
+ unsigned char *keep_start = lpSkip(skip);
+ unsigned char *keep_end;
+ if (i + 1 < count) {
+ keep_end = ps[i + 1];
+ /* Deleting consecutive elements. Nothing to keep between them. */
+ if (keep_start == keep_end) continue;
+ } else {
+ /* Keep the rest of the listpack including the EOF marker. */
+ keep_end = lp_end;
+ }
+ assert(keep_end > keep_start);
+ size_t bytes_to_keep = keep_end - keep_start;
+ memmove(dst, keep_start, bytes_to_keep);
+ dst += bytes_to_keep;
+ }
+ /* Update total size and num elements. */
+ size_t deleted_bytes = lp_end - dst;
+ total_bytes -= deleted_bytes;
+ assert(lp[total_bytes - 1] == LP_EOF);
+ lpSetTotalBytes(lp, total_bytes);
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - count);
+ return lpShrinkToFit(lp);
+}
+
/* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.
*
* NOTE: The larger listpack is reallocated to contain the new merged listpack.
@@ -1383,6 +1432,43 @@ void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *k
val->sval = lpGetValue(p, &(val->slen), &(val->lval));
}
+/* Randomly select 'count' entries and store them in the 'entries' array, which
+ * needs to have space for 'count' listpackEntry structs. The order is random
+ * and duplicates are possible. */
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries) {
+ struct pick {
+ unsigned int index;
+ unsigned int order;
+ } *picks = lp_malloc(count * sizeof(struct pick));
+ unsigned int total_size = lpLength(lp);
+ assert(total_size);
+ for (unsigned int i = 0; i < count; i++) {
+ picks[i].index = rand() % total_size;
+ picks[i].order = i;
+ }
+
+ /* Sort by index. */
+ qsort(picks, count, sizeof(struct pick), uintCompare);
+
+ /* Iterate over listpack in index order and store the values in the entries
+ * array respecting the original order. */
+ unsigned char *p = lpFirst(lp);
+ unsigned int j = 0; /* index in listpack */
+ for (unsigned int i = 0; i < count; i++) {
+ /* Advance listpack pointer to until we reach 'index' listpack. */
+ while (j < picks[i].index) {
+ p = lpNext(lp, p);
+ j++;
+ }
+ int storeorder = picks[i].order;
+ unsigned int len = 0;
+ long long llval = 0;
+ unsigned char *str = lpGetValue(p, &len, &llval);
+ lpSaveValue(str, len, llval, &entries[storeorder]);
+ }
+ lp_free(picks);
+}
+
/* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random, and the selections
* are non-unique (repetitions are possible).
@@ -1449,34 +1535,83 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
if (count > total_size)
count = total_size;
- /* To only iterate once, every time we try to pick a member, the probability
- * we pick it is the quotient of the count left we want to pick and the
- * count still we haven't visited in the dict, this way, we could make every
- * member be equally picked.*/
p = lpFirst(lp);
unsigned int picked = 0, remaining = count;
while (picked < count && p) {
- double randomDouble = ((double)rand()) / RAND_MAX;
- double threshold = ((double)remaining) / (total_size - index);
- if (randomDouble <= threshold) {
+ assert((p = lpNextRandom(lp, p, &index, remaining, 1)));
+ key = lpGetValue(p, &klen, &klval);
+ lpSaveValue(key, klen, klval, &keys[picked]);
+ assert((p = lpNext(lp, p)));
+ index++;
+ if (vals) {
key = lpGetValue(p, &klen, &klval);
- lpSaveValue(key, klen, klval, &keys[picked]);
- assert((p = lpNext(lp, p)));
- if (vals) {
- key = lpGetValue(p, &klen, &klval);
- lpSaveValue(key, klen, klval, &vals[picked]);
- }
- remaining--;
- picked++;
- } else {
- assert((p = lpNext(lp, p)));
+ lpSaveValue(key, klen, klval, &vals[picked]);
}
p = lpNext(lp, p);
+ remaining--;
+ picked++;
index++;
}
return picked;
}
+/* Iterates forward to the "next random" element, given we are yet to pick
+ * 'remaining' unique elements between the starting element 'p' (inclusive) and
+ * the end of the list. The 'index' needs to be initialized according to the
+ * current zero-based index matching the position of the starting element 'p'
+ * and is updated to match the returned element's zero-based index. If
+ * 'even_only' is nonzero, an element with an even index is picked, which is
+ * useful if the listpack represents a key-value pair sequence.
+ *
+ * Note that this function can return p. In order to skip the previously
+ * returned element, you need to call lpNext() or lpDelete() after each call to
+ * lpNextRandom(). Idea:
+ *
+ * assert(remaining <= lpLength(lp));
+ * p = lpFirst(lp);
+ * i = 0;
+ * while (remaining > 0) {
+ * p = lpNextRandom(lp, p, &i, remaining--, 0);
+ *
+ * // ... Do stuff with p ...
+ *
+ * p = lpNext(lp, p);
+ * i++;
+ * }
+ */
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only)
+{
+ /* To only iterate once, every time we try to pick a member, the probability
+ * we pick it is the quotient of the count left we want to pick and the
+ * count still we haven't visited. This way, we could make every member be
+ * equally likely to be picked. */
+ unsigned int i = *index;
+ unsigned int total_size = lpLength(lp);
+ while (i < total_size && p != NULL) {
+ if (even_only && i % 2 != 0) {
+ p = lpNext(lp, p);
+ i++;
+ continue;
+ }
+
+ /* Do we pick this element? */
+ unsigned int available = total_size - i;
+ if (even_only) available /= 2;
+ double randomDouble = ((double)rand()) / RAND_MAX;
+ double threshold = ((double)remaining) / available;
+ if (randomDouble <= threshold) {
+ *index = i;
+ return p;
+ }
+
+ p = lpNext(lp, p);
+ i++;
+ }
+
+ return NULL;
+}
+
/* Print info of listpack which is used in debugCommand */
void lpRepr(unsigned char *lp) {
unsigned char *p, *vstr;
@@ -1902,6 +2037,21 @@ int listpackTest(int argc, char *argv[], int flags) {
zfree(lp);
}
+ TEST("Batch delete") {
+ unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */
+ assert(lpLength(lp) == 4); /* Pre-condition */
+ unsigned char *p0 = lpFirst(lp),
+ *p1 = lpNext(lp, p0),
+ *p2 = lpNext(lp, p1),
+ *p3 = lpNext(lp, p2);
+ unsigned char *ps[] = {p0, p1, p3};
+ lp = lpBatchDelete(lp, ps, 3);
+ assert(lpLength(lp) == 1);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ assert(lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL) == 1);
+ lpFree(lp);
+ }
+
TEST("Delete foo while iterating") {
lp = createList();
p = lpFirst(lp);
@@ -2048,6 +2198,82 @@ int listpackTest(int argc, char *argv[], int flags) {
zfree(lp3);
}
+ TEST("lpNextRandom normal usage") {
+ /* Create some data */
+ unsigned char *lp = lpNew(0);
+ unsigned char buf[100] = "asdf";
+ unsigned int size = 100;
+ for (size_t i = 0; i < size; i++) {
+ lp = lpAppend(lp, buf, i);
+ }
+ assert(lpLength(lp) == size);
+
+ /* Pick a subset of the elements of every possible subset size */
+ for (unsigned int count = 0; count <= size; count++) {
+ unsigned int remaining = count;
+ unsigned char *p = lpFirst(lp);
+ unsigned char *prev = NULL;
+ unsigned index = 0;
+ while (remaining > 0) {
+ assert(p != NULL);
+ p = lpNextRandom(lp, p, &index, remaining--, 0);
+ assert(p != NULL);
+ assert(p != prev);
+ prev = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ }
+ }
+
+ TEST("lpNextRandom corner cases") {
+ unsigned char *lp = lpNew(0);
+ unsigned i = 0;
+
+ /* Pick from empty listpack returns NULL. */
+ assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL);
+
+ /* Add some elements and find their pointers within the listpack. */
+ lp = lpAppend(lp, (unsigned char *)"abc", 3);
+ lp = lpAppend(lp, (unsigned char *)"def", 3);
+ lp = lpAppend(lp, (unsigned char *)"ghi", 3);
+ assert(lpLength(lp) == 3);
+ unsigned char *p0 = lpFirst(lp);
+ unsigned char *p1 = lpNext(lp, p0);
+ unsigned char *p2 = lpNext(lp, p1);
+ assert(lpNext(lp, p2) == NULL);
+
+ /* Pick zero elements returns NULL. */
+ i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL);
+
+ /* Pick all returns all. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1);
+ i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2);
+
+ /* Pick more than one when there's only one left returns the last one. */
+ i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2);
+
+ /* Pick all even elements returns p0 and p2. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2);
+
+ /* Don't crash even for bad index. */
+ for (int j = 0; j < 100; j++) {
+ unsigned char *p;
+ switch (j % 4) {
+ case 0: p = p0; break;
+ case 1: p = p1; break;
+ case 2: p = p2; break;
+ case 3: p = NULL; break;
+ }
+ i = j % 7;
+ unsigned int remaining = j % 5;
+ p = lpNextRandom(lp, p, &i, remaining, 0);
+ assert(p == p0 || p == p1 || p == p2 || p == NULL);
+ }
+ }
+
TEST("Random pair with one element") {
listpackEntry key, val;
unsigned char *lp = lpNew(0);
diff --git a/src/listpack.h b/src/listpack.h
index 3e750af5b..ce27ea367 100644
--- a/src/listpack.h
+++ b/src/listpack.h
@@ -70,6 +70,7 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long
unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
unsigned char *lpMerge(unsigned char **first, unsigned char **second);
unsigned long lpLength(unsigned char *lp);
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
@@ -90,6 +91,9 @@ unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only);
int lpSafeToAdd(unsigned char* lp, size_t add);
void lpRepr(unsigned char *lp);
diff --git a/src/object.c b/src/object.c
index 44fd5befd..d48af4128 100644
--- a/src/object.c
+++ b/src/object.c
@@ -247,6 +247,13 @@ robj *createIntsetObject(void) {
return o;
}
+robj *createSetListpackObject(void) {
+ unsigned char *lp = lpNew(0);
+ robj *o = createObject(OBJ_SET, lp);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
robj *createHashObject(void) {
unsigned char *zl = lpNew(0);
robj *o = createObject(OBJ_HASH, zl);
@@ -306,6 +313,7 @@ void freeSetObject(robj *o) {
dictRelease((dict*) o->ptr);
break;
case OBJ_ENCODING_INTSET:
+ case OBJ_ENCODING_LISTPACK:
zfree(o->ptr);
break;
default:
@@ -441,6 +449,8 @@ void dismissSetObject(robj *o, size_t size_hint) {
dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*));
} else if (o->encoding == OBJ_ENCODING_INTSET) {
dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
} else {
serverPanic("Unknown set encoding type");
}
diff --git a/src/rdb.c b/src/rdb.c
index 083e5cf89..bbb61aafb 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -665,6 +665,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
else if (o->encoding == OBJ_ENCODING_HT)
return rdbSaveType(rdb,RDB_TYPE_SET);
+ else if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK);
else
serverPanic("Unknown set encoding");
case OBJ_ZSET:
@@ -858,6 +860,10 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char *)o->ptr);
+ if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1;
+ nwritten += n;
} else {
serverPanic("Unknown set encoding");
}
@@ -1690,19 +1696,21 @@ static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int he
}
/* callback for to check the listpack doesn't have duplicate records */
-static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
struct {
+ int pairs;
long count;
dict *fields;
} *data = userdata;
if (data->fields == NULL) {
data->fields = dictCreate(&hashDictType);
- dictExpand(data->fields, head_count/2);
+ dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
}
- /* Even records are field names, add to dict and check that's not a dup */
- if (((data->count) & 1) == 0) {
+ /* If we're checking pairs, then even records are field names. Otherwise
+ * we're checking all elements. Add to dict and check that's not a dup */
+ if (!data->pairs || ((data->count) & 1) == 0) {
unsigned char *str;
int64_t slen;
unsigned char buf[LP_INTBUF_SIZE];
@@ -1722,21 +1730,24 @@ static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, vo
/* Validate the integrity of the listpack structure.
* when `deep` is 0, only the integrity of the header is validated.
- * when `deep` is 1, we scan all the entries one by one. */
-int lpPairsValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep) {
+ * when `deep` is 1, we scan all the entries one by one.
+ * when `pairs` is 0, all elements need to be unique (it's a set)
+ * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
if (!deep)
return lpValidateIntegrity(lp, size, 0, NULL, NULL);
/* Keep track of the field names to locate duplicate ones */
struct {
+ int pairs;
long count;
dict *fields; /* Initialisation at the first callback. */
- } data = {0, NULL};
+ } data = {pairs, 0, NULL};
- int ret = lpValidateIntegrity(lp, size, 1, _lpPairsEntryValidation, &data);
+ int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);
/* make sure we have an even number of records. */
- if (data.count & 1)
+ if (pairs && data.count & 1)
ret = 0;
if (data.fields) dictRelease(data.fields);
@@ -1813,6 +1824,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
/* Load every single element of the set */
+ size_t maxelelen = 0, sumelelen = 0;
for (i = 0; i < len; i++) {
long long llval;
sds sdsele;
@@ -1821,6 +1833,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
decrRefCount(o);
return NULL;
}
+ size_t elelen = sdslen(sdsele);
+ sumelelen += elelen;
+ if (elelen > maxelelen) maxelelen = elelen;
if (o->encoding == OBJ_ENCODING_INTSET) {
/* Fetch integer value from element. */
@@ -1833,6 +1848,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
sdsfree(sdsele);
return NULL;
}
+ } else if (setTypeSize(o) < server.set_max_listpack_entries &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, sumelelen))
+ {
+ /* We checked if it's safe to add one large element instead
+ * of many small ones. It's OK since lpSafeToAdd doesn't
+ * care about individual elements, only the total size. */
+ setTypeConvert(o, OBJ_ENCODING_LISTPACK);
} else {
setTypeConvert(o,OBJ_ENCODING_HT);
if (dictTryExpand(o->ptr,len) != DICT_OK) {
@@ -1845,6 +1868,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
/* This will also be called when the set was just converted
+ * to a listpack encoded set. */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (setTypeSize(o) < server.set_max_listpack_entries &&
+ elelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(o->ptr, elelen))
+ {
+ unsigned char *p = lpFirst(o->ptr);
+ if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
+ } else {
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ if (dictTryExpand(o->ptr, len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+ (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+ }
+
+ /* This will also be called when the set was just converted
* to a regular hash table encoded set. */
if (o->encoding == OBJ_ENCODING_HT) {
if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
@@ -2126,6 +2176,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
} else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
rdbtype == RDB_TYPE_LIST_ZIPLIST ||
rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_SET_LISTPACK ||
rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
rdbtype == RDB_TYPE_ZSET_LISTPACK ||
rdbtype == RDB_TYPE_HASH_ZIPLIST ||
@@ -2243,6 +2294,20 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
if (intsetLen(o->ptr) > server.set_max_intset_entries)
setTypeConvert(o,OBJ_ENCODING_HT);
break;
+ case RDB_TYPE_SET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+ rdbReportCorruptRDB("Set listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (setTypeSize(o) > server.set_max_listpack_entries)
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ break;
case RDB_TYPE_ZSET_ZIPLIST:
{
unsigned char *lp = lpNew(encoded_len);
@@ -2272,7 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
case RDB_TYPE_ZSET_LISTPACK:
if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
- if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
rdbReportCorruptRDB("Zset listpack integrity check failed.");
zfree(encoded);
o->ptr = NULL;
@@ -2318,7 +2383,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
case RDB_TYPE_HASH_LISTPACK:
if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
- if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
rdbReportCorruptRDB("Hash listpack integrity check failed.");
zfree(encoded);
o->ptr = NULL;
diff --git a/src/rdb.h b/src/rdb.h
index d215334db..4350d3a28 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -38,7 +38,7 @@
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define RDB_VERSION 10
+#define RDB_VERSION 11
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -95,10 +95,11 @@
#define RDB_TYPE_ZSET_LISTPACK 17
#define RDB_TYPE_LIST_QUICKLIST_2 18
#define RDB_TYPE_STREAM_LISTPACKS_2 19
+#define RDB_TYPE_SET_LISTPACK 20
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 19))
+#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 20))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
#define RDB_OPCODE_FUNCTION2 245 /* function library data */
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
index cedfd84cb..a537c6dae 100644
--- a/src/redis-check-rdb.c
+++ b/src/redis-check-rdb.c
@@ -97,7 +97,8 @@ char *rdb_type_string[] = {
"stream",
"hash-listpack",
"zset-listpack",
- "quicklist-v2"
+ "quicklist-v2",
+ "set-listpack",
};
/* Show a few stats collected into 'rdbstate' */
diff --git a/src/server.h b/src/server.h
index 4dfaaea7b..c96c1c743 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1850,6 +1850,8 @@ struct redisServer {
size_t hash_max_listpack_entries;
size_t hash_max_listpack_value;
size_t set_max_intset_entries;
+ size_t set_max_listpack_entries;
+ size_t set_max_listpack_value;
size_t zset_max_listpack_entries;
size_t zset_max_listpack_value;
size_t hll_sparse_max_bytes;
@@ -2331,6 +2333,7 @@ typedef struct {
int encoding;
int ii; /* intset iterator */
dictIterator *di;
+ unsigned char *lpi; /* listpack iterator */
} setTypeIterator;
/* Structure to hold hash iteration abstraction. Note that iteration over
@@ -2655,6 +2658,7 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
robj *createQuicklistObject(void);
robj *createSetObject(void);
robj *createIntsetObject(void);
+robj *createSetListpackObject(void);
robj *createHashObject(void);
robj *createZsetObject(void);
robj *createZsetListpackObject(void);
@@ -2980,9 +2984,9 @@ int setTypeRemove(robj *subject, sds value);
int setTypeIsMember(robj *subject, sds value);
setTypeIterator *setTypeInitIterator(robj *subject);
void setTypeReleaseIterator(setTypeIterator *si);
-int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele);
sds setTypeNextObject(setTypeIterator *si);
-int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
unsigned long setTypeSize(const robj *subject);
void setTypeConvert(robj *subject, int enc);
diff --git a/src/t_set.c b/src/t_set.c
index 8a669fe7c..8ab602791 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -29,6 +29,12 @@
#include "server.h"
+/* Internal prototypes */
+
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeRemoveAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+
/*-----------------------------------------------------------------------------
* Set Commands
*----------------------------------------------------------------------------*/
@@ -42,45 +48,154 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *setTypeCreate(sds value) {
if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
return createIntsetObject();
- return createSetObject();
+ return createSetListpackObject();
+}
+
+/* Return the maximum number of entries to store in an intset. */
+static size_t intsetMaxEntries(void) {
+ size_t max_entries = server.set_max_intset_entries;
+ /* limit to 1G entries due to intset internals. */
+ if (max_entries >= 1<<30) max_entries = 1<<30;
+ return max_entries;
}
-/* Add the specified value into a set.
+/* Converts intset to HT if it contains too many entries. */
+static void maybeConvertIntset(robj *subject) {
+ serverAssert(subject->encoding == OBJ_ENCODING_INTSET);
+ if (intsetLen(subject->ptr) > intsetMaxEntries())
+ setTypeConvert(subject,OBJ_ENCODING_HT);
+}
+
+/* When you know all set elements are integers, call this to convert the set to
+ * an intset. No conversion happens if the set contains too many entries for an
+ * intset. */
+static void maybeConvertToIntset(robj *set) {
+ if (set->encoding == OBJ_ENCODING_INTSET) return; /* already intset */
+ if (setTypeSize(set) > intsetMaxEntries()) return; /* can't use intset */
+ intset *is = intsetNew();
+ char *str;
+ size_t len;
+ int64_t llval;
+ setTypeIterator *si = setTypeInitIterator(set);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str) {
+ /* If the element is returned as a string, we may be able to convert
+ * it to integer. This happens for OBJ_ENCODING_HT. */
+ serverAssert(string2ll(str, len, (long long *)&llval));
+ }
+ uint8_t success = 0;
+ is = intsetAdd(is, llval, &success);
+ serverAssert(success);
+ }
+ setTypeReleaseIterator(si);
+ freeSetObject(set); /* frees the internals but not robj itself */
+ set->ptr = is;
+ set->encoding = OBJ_ENCODING_INTSET;
+}
+
+/* Add the specified sds value into a set.
*
* If the value was already member of the set, nothing is done and 0 is
* returned, otherwise the new element is added and 1 is returned. */
int setTypeAdd(robj *subject, sds value) {
- long long llval;
- if (subject->encoding == OBJ_ENCODING_HT) {
- dict *ht = subject->ptr;
- dictEntry *de = dictAddRaw(ht,value,NULL);
- if (de) {
- dictSetKey(ht,de,sdsdup(value));
+ return setTypeAddAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Add member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was added and 0 if it was already a member. */
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET) {
+ uint8_t success = 0;
+ set->ptr = intsetAdd(set->ptr, llval, &success);
+ if (success) maybeConvertIntset(set);
+ return success;
+ }
+ /* Convert int to string. */
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ serverAssert(str);
+ if (set->encoding == OBJ_ENCODING_HT) {
+ /* Avoid duping the string if it is an sds string. */
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ dict *ht = set->ptr;
+ dictEntry *de = dictAddRaw(ht,sdsval,NULL);
+ if (de && sdsval == str) {
+ /* String was added but we don't own this sds string. Dup it and
+ * replace it in the dict entry. */
+ dictSetKey(ht,de,sdsdup((sds)str));
dictSetVal(ht,de,NULL);
+ } else if (!de && sdsval != str) {
+ /* String was already a member. Free our temporary sds copy. */
+ sdsfree(sdsval);
+ }
+ return (de != NULL);
+ } else if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p != NULL)
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p == NULL) {
+ /* Not found. */
+ if (lpLength(lp) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ lpSafeToAdd(lp, len))
+ {
+ if (str == tmpbuf) {
+ /* This came in as integer so we can avoid parsing it again.
+ * TODO: Create and use lpFindInteger; don't go via string. */
+ lp = lpAppendInteger(lp, llval);
+ } else {
+ lp = lpAppend(lp, (unsigned char*)str, len);
+ }
+ set->ptr = lp;
+ } else {
+ /* Size limit is reached. Convert to hashtable and add. */
+ setTypeConvert(set, OBJ_ENCODING_HT);
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ }
return 1;
}
- } else if (subject->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long value;
+ if (string2ll(str, len, &value)) {
uint8_t success = 0;
- subject->ptr = intsetAdd(subject->ptr,llval,&success);
+ set->ptr = intsetAdd(set->ptr,value,&success);
if (success) {
- /* Convert to regular set when the intset contains
- * too many entries. */
- size_t max_entries = server.set_max_intset_entries;
- /* limit to 1G entries due to intset internals. */
- if (max_entries >= 1<<30) max_entries = 1<<30;
- if (intsetLen(subject->ptr) > max_entries)
- setTypeConvert(subject,OBJ_ENCODING_HT);
+ maybeConvertIntset(set);
return 1;
}
} else {
- /* Failed to get integer from object, convert to regular set. */
- setTypeConvert(subject,OBJ_ENCODING_HT);
-
- /* The set *was* an intset and this value is not integer
- * encodable, so dictAdd should always work. */
- serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
- return 1;
+ size_t maxelelen = intsetLen(set->ptr) == 0 ?
+ 0 : max(sdigits10(intsetMax(set->ptr)),
+ sdigits10(intsetMin(set->ptr)));
+ if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, maxelelen * intsetLen(set->ptr) + len))
+ {
+ /* In the "safe to add" check above we assumed all elements in
+ * the intset are of size maxelelen. This is an upper bound. */
+ setTypeConvert(set, OBJ_ENCODING_LISTPACK);
+ unsigned char *lp = set->ptr;
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ set->ptr = lp;
+ return 1;
+ } else {
+ setTypeConvert(set, OBJ_ENCODING_HT);
+ /* The set *was* an intset and this value is not integer
+ * encodable, so dictAdd should always work. */
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ return 1;
+ }
}
} else {
serverPanic("Unknown set encoding");
@@ -88,15 +203,50 @@ int setTypeAdd(robj *subject, sds value) {
return 0;
}
+/* Deletes a value provided as an sds string from the set. Returns 1 if the
+ * value was deleted and 0 if it was not a member of the set. */
int setTypeRemove(robj *setobj, sds value) {
- long long llval;
+ return setTypeRemoveAux(setobj, value, sdslen(value), 0, 1);
+}
+
+/* Remove a member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was deleted and 0 if it was not a member of the set. */
+int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ int success;
+ setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
+ return success;
+ }
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
if (setobj->encoding == OBJ_ENCODING_HT) {
- if (dictDelete(setobj->ptr,value) == DICT_OK) {
- if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
+ if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
+ if (sdsval != str) sdsfree(sdsval); /* free temp copy */
+ return deleted;
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p == NULL) return 0;
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p != NULL) {
+ lp = lpDelete(lp, p, NULL);
+ setobj->ptr = lp;
return 1;
}
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
+ long long llval;
+ if (string2ll(str, len, &llval)) {
int success;
setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
if (success) return 1;
@@ -107,18 +257,45 @@ int setTypeRemove(robj *setobj, sds value) {
return 0;
}
+/* Check if an sds string is a member of the set. Returns 1 if the value is a
+ * member of the set and 0 if it isn't. */
int setTypeIsMember(robj *subject, sds value) {
- long long llval;
- if (subject->encoding == OBJ_ENCODING_HT) {
- return dictFind((dict*)subject->ptr,value) != NULL;
- } else if (subject->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
- return intsetFind((intset*)subject->ptr,llval);
- }
+ return setTypeIsMemberAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Membership checking optimized for the different encodings. The value can be
+ * provided as an sds string (indicated by passing str_is_sds = 1), as string
+ * and length (str_is_sds = 0) or as an integer in which case str is set to NULL
+ * and llval is provided instead.
+ *
+ * Returns 1 if the value is a member of the set and 0 if it isn't. */
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET)
+ return intsetFind(set->ptr, llval);
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ return p && lpFind(lp, p, (unsigned char*)str, len, 0);
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long llval;
+ return string2ll(str, len, &llval) && intsetFind(set->ptr, llval);
+ } else if (set->encoding == OBJ_ENCODING_HT && str_is_sds) {
+ return dictFind(set->ptr, (sds)str) != NULL;
+ } else if (set->encoding == OBJ_ENCODING_HT) {
+ sds sdsval = sdsnewlen(str, len);
+ int result = dictFind(set->ptr, sdsval) != NULL;
+ sdsfree(sdsval);
+ return result;
} else {
serverPanic("Unknown set encoding");
}
- return 0;
}
setTypeIterator *setTypeInitIterator(robj *subject) {
@@ -129,6 +306,8 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
si->di = dictGetIterator(subject->ptr);
} else if (si->encoding == OBJ_ENCODING_INTSET) {
si->ii = 0;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ si->lpi = NULL;
} else {
serverPanic("Unknown set encoding");
}
@@ -142,28 +321,50 @@ void setTypeReleaseIterator(setTypeIterator *si) {
}
/* Move to the next entry in the set. Returns the object at the current
- * position.
+ * position, as a string or as an integer.
*
- * Since set elements can be internally be stored as SDS strings or
+ * Since set elements can be internally be stored as SDS strings, char buffers or
* simple arrays of integers, setTypeNext returns the encoding of the
- * set object you are iterating, and will populate the appropriate pointer
- * (sdsele) or (llele) accordingly.
+ * set object you are iterating, and will populate the appropriate pointers
+ * (str and len) or (llele) depending on whether the value is stored as a string
+ * or as an integer internally.
+ *
+ * If OBJ_ENCODING_HT is returned, then str points to an sds string and can be
+ * used as such. If OBJ_ENCODING_INTSET, then llele is populated and str is
+ * pointed to NULL. If OBJ_ENCODING_LISTPACK is returned, the value can be
+ * either a string or an integer. If *str is not NULL, then str and len are
+ * populated with the string content and length. Otherwise, llele populated with
+ * an integer value.
*
- * Note that both the sdsele and llele pointers should be passed and cannot
+ * Note that str, len and llele pointers should all be passed and cannot
* be NULL since the function will try to defensively populate the non
* used field with values which are easy to trap if misused.
*
- * When there are no longer elements -1 is returned. */
-int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
+ * When there are no more elements -1 is returned. */
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
if (si->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -1;
- *sdsele = dictGetKey(de);
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
*llele = -123456789; /* Not needed. Defensive. */
} else if (si->encoding == OBJ_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -1;
- *sdsele = NULL; /* Not needed. Defensive. */
+ *str = NULL;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = si->subject->ptr;
+ unsigned char *lpi = si->lpi;
+ if (lpi == NULL) {
+ lpi = lpFirst(lp);
+ } else {
+ lpi = lpNext(lp, lpi);
+ }
+ if (lpi == NULL) return -1;
+ si->lpi = lpi;
+ unsigned int l;
+ *str = (char *)lpGetValue(lpi, &l, (long long *)llele);
+ *len = (size_t)l;
} else {
serverPanic("Wrong set encoding in setTypeNext");
}
@@ -179,54 +380,85 @@ int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
* an issue. */
sds setTypeNextObject(setTypeIterator *si) {
int64_t intele;
- sds sdsele;
- int encoding;
+ char *str;
+ size_t len;
- encoding = setTypeNext(si,&sdsele,&intele);
- switch(encoding) {
- case -1: return NULL;
- case OBJ_ENCODING_INTSET:
- return sdsfromlonglong(intele);
- case OBJ_ENCODING_HT:
- return sdsdup(sdsele);
- default:
- serverPanic("Unsupported encoding");
- }
- return NULL; /* just to suppress warnings */
+ if (setTypeNext(si, &str, &len, &intele) == -1) return NULL;
+ if (str != NULL) return sdsnewlen(str, len);
+ return sdsfromlonglong(intele);
}
/* Return random element from a non empty set.
* The returned element can be an int64_t value if the set is encoded
- * as an "intset" blob of integers, or an SDS string if the set
- * is a regular set.
+ * as an "intset" blob of integers, or an string.
*
- * The caller provides both pointers to be populated with the right
+ * The caller provides three pointers to be populated with the right
* object. The return value of the function is the object->encoding
- * field of the object and is used by the caller to check if the
- * int64_t pointer or the sds pointer was populated.
+ * field of the object and can be used by the caller to check if the
+ * int64_t pointer or the str and len pointers were populated, as for
+ * setTypeNext. If OBJ_ENCODING_HT is returned, str is pointed to a
+ * string which is actually an sds string and it can be used as such.
*
- * Note that both the sdsele and llele pointers should be passed and cannot
- * be NULL since the function will try to defensively populate the non
- * used field with values which are easy to trap if misused. */
-int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
+ * Note that both the str, len and llele pointers should be passed and cannot
+ * be NULL. If str is set to NULL, the value is an integer stored in llele. */
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele) {
if (setobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetFairRandomKey(setobj->ptr);
- *sdsele = dictGetKey(de);
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
*llele = -123456789; /* Not needed. Defensive. */
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
*llele = intsetRandom(setobj->ptr);
- *sdsele = NULL; /* Not needed. Defensive. */
+ *str = NULL; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ int r = rand() % lpLength(lp);
+ unsigned char *p = lpSeek(lp, r);
+ unsigned int l;
+ *str = (char *)lpGetValue(p, &l, (long long *)llele);
+ *len = (size_t)l;
} else {
serverPanic("Unknown set encoding");
}
return setobj->encoding;
}
+/* Pops a random element and returns it as an object. */
+robj *setTypePopRandom(robj *set) {
+ robj *obj;
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Find random and delete it without re-seeking the listpack. */
+ unsigned int i = 0;
+ unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 0);
+ unsigned int len = 0; /* initialize to silence warning */
+ long long llele = 0; /* initialize to silence warning */
+ char *str = (char *)lpGetValue(p, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ set->ptr = lpDelete(set->ptr, p, NULL);
+ } else {
+ char *str;
+ size_t len = 0;
+ int64_t llele = 0;
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+ }
+ return obj;
+}
+
unsigned long setTypeSize(const robj *subject) {
if (subject->encoding == OBJ_ENCODING_HT) {
return dictSize((const dict*)subject->ptr);
} else if (subject->encoding == OBJ_ENCODING_INTSET) {
return intsetLen((const intset*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ return lpLength((unsigned char *)subject->ptr);
} else {
serverPanic("Unknown set encoding");
}
@@ -238,27 +470,44 @@ unsigned long setTypeSize(const robj *subject) {
void setTypeConvert(robj *setobj, int enc) {
setTypeIterator *si;
serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
- setobj->encoding == OBJ_ENCODING_INTSET);
+ setobj->encoding != enc);
if (enc == OBJ_ENCODING_HT) {
- int64_t intele;
dict *d = dictCreate(&setDictType);
sds element;
/* Presize the dict to avoid rehashing */
- dictExpand(d,intsetLen(setobj->ptr));
+ dictExpand(d, setTypeSize(setobj));
/* To add the elements we extract integers and create redis objects */
si = setTypeInitIterator(setobj);
- while (setTypeNext(si,&element,&intele) != -1) {
- element = sdsfromlonglong(intele);
+ while ((element = setTypeNextObject(si)) != NULL) {
serverAssert(dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
setobj->encoding = OBJ_ENCODING_HT;
- zfree(setobj->ptr);
setobj->ptr = d;
+ } else if (enc == OBJ_ENCODING_LISTPACK) {
+ /* Preallocate the minimum one byte per element */
+ size_t estcap = setTypeSize(setobj);
+ unsigned char *lp = lpNew(estcap);
+ char *str;
+ size_t len;
+ int64_t llele;
+ si = setTypeInitIterator(setobj);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str != NULL)
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ else
+ lp = lpAppendInteger(lp, llele);
+ }
+ setTypeReleaseIterator(si);
+
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
+ setobj->encoding = OBJ_ENCODING_LISTPACK;
+ setobj->ptr = lp;
} else {
serverPanic("Unsupported set conversion");
}
@@ -272,8 +521,6 @@ void setTypeConvert(robj *setobj, int enc) {
robj *setTypeDup(robj *o) {
robj *set;
setTypeIterator *si;
- sds elesds;
- int64_t intobj;
serverAssert(o->type == OBJ_SET);
@@ -285,13 +532,23 @@ robj *setTypeDup(robj *o) {
memcpy(newis,is,size);
set = createObject(OBJ_SET, newis);
set->encoding = OBJ_ENCODING_INTSET;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = o->ptr;
+ size_t sz = lpBytes(lp);
+ unsigned char *new_lp = zmalloc(sz);
+ memcpy(new_lp, lp, sz);
+ set = createObject(OBJ_SET, new_lp);
+ set->encoding = OBJ_ENCODING_LISTPACK;
} else if (o->encoding == OBJ_ENCODING_HT) {
set = createSetObject();
dict *d = o->ptr;
dictExpand(set->ptr, dictSize(d));
si = setTypeInitIterator(o);
- while (setTypeNext(si, &elesds, &intobj) != -1) {
- setTypeAdd(set, elesds);
+ char *str;
+ size_t len;
+ int64_t intobj;
+ while (setTypeNext(si, &str, &len, &intobj) != -1) {
+ setTypeAdd(set, (sds)str);
}
setTypeReleaseIterator(si);
} else {
@@ -509,9 +766,9 @@ void spopWithCountCommand(client *c) {
addReplySetLen(c,count);
/* Common iteration vars. */
- sds sdsele;
robj *objele;
- int encoding;
+ char *str;
+ size_t len;
int64_t llele;
unsigned long remaining = size-count; /* Elements left after SPOP. */
@@ -522,24 +779,49 @@ void spopWithCountCommand(client *c) {
* CASE 2: The number of elements to return is small compared to the
* set size. We can just extract random elements and return them to
* the set. */
- if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
- while(count--) {
- /* Emit and remove. */
- encoding = setTypeRandomElement(set,&sdsele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
- addReplyBulkLongLong(c,llele);
- objele = createStringObjectFromLongLong(llele);
- set->ptr = intsetRemove(set->ptr,llele,NULL);
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count &&
+ set->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ /* Specialized case for listpack. Traverse it only once. */
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * count);
+ for (unsigned long i = 0; i < count; i++) {
+ p = lpNextRandom(lp, p, &index, count - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+
+ if (str) {
+ addReplyBulkCBuffer(c, str, len);
+ objele = createStringObject(str, len);
} else {
- addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
- objele = createStringObject(sdsele,sdslen(sdsele));
- setTypeRemove(set,sdsele);
+ addReplyBulkLongLong(c, llele);
+ objele = createStringObjectFromLongLong(llele);
}
/* Replicate/AOF this command as an SREM operation */
propargv[2] = objele;
alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL);
decrRefCount(objele);
+
+ /* Store pointer for later deletion and move to next. */
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, count);
+ zfree(ps);
+ set->ptr = lp;
+ } else if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ while(count--) {
+ objele = setTypePopRandom(set);
+ addReplyBulk(c, objele);
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
}
} else {
/* CASE 3: The number of elements to return is very big, approaching
@@ -553,29 +835,46 @@ void spopWithCountCommand(client *c) {
robj *newset = NULL;
/* Create a new set with just the remaining elements. */
- while(remaining--) {
- encoding = setTypeRandomElement(set,&sdsele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
- sdsele = sdsfromlonglong(llele);
- } else {
- sdsele = sdsdup(sdsele);
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Specialized case for listpack. Traverse it only once. */
+ newset = createSetListpackObject();
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * remaining);
+ for (unsigned long i = 0; i < remaining; i++) {
+ p = lpNextRandom(lp, p, &index, remaining - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ setTypeAddAux(newset, str, len, llele, 0);
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, remaining);
+ zfree(ps);
+ set->ptr = lp;
+ } else {
+ while(remaining--) {
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (!newset) {
+ newset = str ? createSetListpackObject() : createIntsetObject();
+ }
+ setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HT);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
}
- if (!newset) newset = setTypeCreate(sdsele);
- setTypeAdd(newset,sdsele);
- setTypeRemove(set,sdsele);
- sdsfree(sdsele);
}
/* Transfer the old set to the client. */
setTypeIterator *si;
si = setTypeInitIterator(set);
- while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) {
- if (encoding == OBJ_ENCODING_INTSET) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
objele = createStringObjectFromLongLong(llele);
} else {
- addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
- objele = createStringObject(sdsele,sdslen(sdsele));
+ addReplyBulkCBuffer(c, str, len);
+ objele = createStringObject(str, len);
}
/* Replicate/AOF this command as an SREM operation */
@@ -599,9 +898,6 @@ void spopWithCountCommand(client *c) {
void spopCommand(client *c) {
robj *set, *ele;
- sds sdsele;
- int64_t llele;
- int encoding;
if (c->argc == 3) {
spopWithCountCommand(c);
@@ -616,17 +912,8 @@ void spopCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
- /* Get a random element from the set */
- encoding = setTypeRandomElement(set,&sdsele,&llele);
-
- /* Remove the element from the set */
- if (encoding == OBJ_ENCODING_INTSET) {
- ele = createStringObjectFromLongLong(llele);
- set->ptr = intsetRemove(set->ptr,llele,NULL);
- } else {
- ele = createStringObject(sdsele,sdslen(sdsele));
- setTypeRemove(set,ele->ptr);
- }
+ /* Pop a random element from the set */
+ ele = setTypePopRandom(set);
notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
@@ -634,7 +921,7 @@ void spopCommand(client *c) {
rewriteClientCommandVector(c,3,shared.srem,c->argv[1],ele);
/* Add the element to the reply */
- addReplyBulk(c,ele);
+ addReplyBulk(c, ele);
decrRefCount(ele);
/* Delete the set if it's empty */
@@ -661,9 +948,9 @@ void srandmemberWithCountCommand(client *c) {
unsigned long count, size;
int uniq = 1;
robj *set;
- sds ele;
+ char *str;
+ size_t len;
int64_t llele;
- int encoding;
dict *d;
@@ -694,12 +981,27 @@ void srandmemberWithCountCommand(client *c) {
* elements in random order. */
if (!uniq || count == 1) {
addReplyArrayLen(c,count);
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK && count > 1) {
+ /* Specialized case for listpack, traversing it only once. */
+ listpackEntry *entries = zmalloc(count * sizeof(listpackEntry));
+ lpRandomEntries(set->ptr, count, entries);
+ for (unsigned long i = 0; i < count; i++) {
+ if (entries[i].sval)
+ addReplyBulkCBuffer(c, entries[i].sval, entries[i].slen);
+ else
+ addReplyBulkLongLong(c, entries[i].lval);
+ }
+ zfree(entries);
+ return;
+ }
+
while(count--) {
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
}
return;
@@ -712,11 +1014,11 @@ void srandmemberWithCountCommand(client *c) {
setTypeIterator *si;
addReplyArrayLen(c,size);
si = setTypeInitIterator(set);
- while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
- if (encoding == OBJ_ENCODING_INTSET) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
size--;
}
@@ -725,6 +1027,31 @@ void srandmemberWithCountCommand(client *c) {
return;
}
+ /* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
+ * Listpack encoded sets are meant to be relatively small, so
+ * SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
+ * copies of the entries. Instead, we emit them directly to the output
+ * buffer. */
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int i = 0;
+ addReplyArrayLen(c, count);
+ while (count) {
+ p = lpNextRandom(lp, p, &i, count--, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ if (str == NULL) {
+ addReplyBulkLongLong(c, llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+ p = lpNext(lp, p);
+ i++;
+ }
+ return;
+ }
+
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
d = dictCreate(&sdsReplyDictType);
@@ -743,13 +1070,13 @@ void srandmemberWithCountCommand(client *c) {
/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
dictExpand(d, size);
- while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
int retval = DICT_ERR;
- if (encoding == OBJ_ENCODING_INTSET) {
+ if (str == NULL) {
retval = dictAdd(d,sdsfromlonglong(llele),NULL);
} else {
- retval = dictAdd(d,sdsdup(ele),NULL);
+ retval = dictAdd(d, sdsnewlen(str, len), NULL);
}
serverAssert(retval == DICT_OK);
}
@@ -777,11 +1104,11 @@ void srandmemberWithCountCommand(client *c) {
dictExpand(d, count);
while (added < count) {
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
sdsele = sdsfromlonglong(llele);
} else {
- sdsele = sdsdup(ele);
+ sdsele = sdsnewlen(str, len);
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
@@ -810,9 +1137,9 @@ void srandmemberWithCountCommand(client *c) {
/* SRANDMEMBER <key> [<count>] */
void srandmemberCommand(client *c) {
robj *set;
- sds ele;
+ char *str;
+ size_t len;
int64_t llele;
- int encoding;
if (c->argc == 3) {
srandmemberWithCountCommand(c);
@@ -826,11 +1153,11 @@ void srandmemberCommand(client *c) {
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
}
@@ -866,7 +1193,8 @@ void sinterGenericCommand(client *c, robj **setkeys,
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
- sds elesds;
+ char *str;
+ size_t len;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
@@ -918,7 +1246,24 @@ void sinterGenericCommand(client *c, robj **setkeys,
if (dstkey) {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
- dstset = createIntsetObject();
+ if (sets[0]->encoding == OBJ_ENCODING_INTSET) {
+ /* The first set is an intset, so the result is an intset too. The
+ * elements are inserted in ascending order which is efficient in an
+ * intset. */
+ dstset = createIntsetObject();
+ } else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) {
+ /* To avoid many reallocs, we estimate that the result is a listpack
+ * of approximately the same size as the first set. Then we shrink
+ * it or possibly convert it to intset it in the end. */
+ unsigned char *lp = lpNew(lpBytes(sets[0]->ptr));
+ dstset = createObject(OBJ_SET, lp);
+ dstset->encoding = OBJ_ENCODING_LISTPACK;
+ } else {
+ /* We start off with a listpack, since it's more efficient to append
+ * to than an intset. Later we can convert it to intset or a
+ * hashtable. */
+ dstset = createSetListpackObject();
+ }
} else if (!cardinality_only) {
replylen = addReplyDeferredLen(c);
}
@@ -926,32 +1271,14 @@ void sinterGenericCommand(client *c, robj **setkeys,
/* Iterate all the elements of the first (smallest) set, and test
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
+ int only_integers = 1;
si = setTypeInitIterator(sets[0]);
- while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
+ while((encoding = setTypeNext(si, &str, &len, &intobj)) != -1) {
for (j = 1; j < setnum; j++) {
if (sets[j] == sets[0]) continue;
- if (encoding == OBJ_ENCODING_INTSET) {
- /* intset with intset is simple... and fast */
- if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
- !intsetFind((intset*)sets[j]->ptr,intobj))
- {
- break;
- /* in order to compare an integer with an object we
- * have to use the generic function, creating an object
- * for this */
- } else if (sets[j]->encoding == OBJ_ENCODING_HT) {
- elesds = sdsfromlonglong(intobj);
- if (!setTypeIsMember(sets[j],elesds)) {
- sdsfree(elesds);
- break;
- }
- sdsfree(elesds);
- }
- } else if (encoding == OBJ_ENCODING_HT) {
- if (!setTypeIsMember(sets[j],elesds)) {
- break;
- }
- }
+ if (!setTypeIsMemberAux(sets[j], str, len, intobj,
+ encoding == OBJ_ENCODING_HT))
+ break;
}
/* Only take action when all sets contain the member */
@@ -963,19 +1290,29 @@ void sinterGenericCommand(client *c, robj **setkeys,
if (limit && cardinality >= limit)
break;
} else if (!dstkey) {
- if (encoding == OBJ_ENCODING_HT)
- addReplyBulkCBuffer(c,elesds,sdslen(elesds));
+ if (str != NULL)
+ addReplyBulkCBuffer(c, str, len);
else
addReplyBulkLongLong(c,intobj);
cardinality++;
} else {
- if (encoding == OBJ_ENCODING_INTSET) {
- elesds = sdsfromlonglong(intobj);
- setTypeAdd(dstset,elesds);
- sdsfree(elesds);
- } else {
- setTypeAdd(dstset,elesds);
+ if (str && only_integers) {
+ /* It may be an integer although we got it as a string. */
+ if (encoding == OBJ_ENCODING_HT &&
+ string2ll(str, len, (long long *)&intobj))
+ {
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK ||
+ dstset->encoding == OBJ_ENCODING_INTSET)
+ {
+ /* Adding it as an integer is more efficient. */
+ str = NULL;
+ }
+ } else {
+ /* It's not an integer */
+ only_integers = 0;
+ }
}
+ setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HT);
}
}
}
@@ -987,6 +1324,12 @@ void sinterGenericCommand(client *c, robj **setkeys,
/* Store the resulting set into the target, if the intersection
* is not an empty set. */
if (setTypeSize(dstset) > 0) {
+ if (only_integers) maybeConvertToIntset(dstset);
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK) {
+ /* We allocated too much memory when we created it to avoid
+ * frequent reallocs. Therefore, we shrink it now. */
+ dstset->ptr = lpShrinkToFit(dstset->ptr);
+ }
setKey(c,c->db,dstkey,dstset,0);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
@@ -1054,7 +1397,10 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
- sds ele;
+ char *str;
+ size_t len;
+ int64_t llval;
+ int encoding;
int j, cardinality = 0;
int diff_algo = 1;
int sameset = 0;
@@ -1120,9 +1466,8 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
- while((ele = setTypeNextObject(si)) != NULL) {
- if (setTypeAdd(dstset,ele)) cardinality++;
- sdsfree(ele);
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
+ cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
}
setTypeReleaseIterator(si);
}
@@ -1138,18 +1483,19 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
* This way we perform at max N*M operations, where N is the size of
* the first set, and M the number of sets. */
si = setTypeInitIterator(sets[0]);
- while((ele = setTypeNextObject(si)) != NULL) {
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
for (j = 1; j < setnum; j++) {
if (!sets[j]) continue; /* no key is an empty set. */
if (sets[j] == sets[0]) break; /* same set! */
- if (setTypeIsMember(sets[j],ele)) break;
+ if (setTypeIsMemberAux(sets[j], str, len, llval,
+ encoding == OBJ_ENCODING_HT))
+ break;
}
if (j == setnum) {
/* There is no other set with this element. Add it. */
- setTypeAdd(dstset,ele);
+ setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
cardinality++;
}
- sdsfree(ele);
}
setTypeReleaseIterator(si);
} else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
@@ -1164,13 +1510,14 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
- while((ele = setTypeNextObject(si)) != NULL) {
+ while((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
if (j == 0) {
- if (setTypeAdd(dstset,ele)) cardinality++;
+ cardinality += setTypeAddAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
} else {
- if (setTypeRemove(dstset,ele)) cardinality--;
+ cardinality -= setTypeRemoveAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
}
- sdsfree(ele);
}
setTypeReleaseIterator(si);
@@ -1184,9 +1531,11 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!dstkey) {
addReplySetLen(c,cardinality);
si = setTypeInitIterator(dstset);
- while((ele = setTypeNextObject(si)) != NULL) {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
- sdsfree(ele);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str)
+ addReplyBulkCBuffer(c, str, len);
+ else
+ addReplyBulkLongLong(c, llval);
}
setTypeReleaseIterator(si);
server.lazyfree_lazy_server_del ? freeObjAsync(NULL, dstset, -1) :
diff --git a/src/t_zset.c b/src/t_zset.c
index e194860db..ed1830175 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1971,6 +1971,10 @@ typedef struct {
dictIterator *di;
dictEntry *de;
} ht;
+ struct {
+ unsigned char *lp;
+ unsigned char *p;
+ } lp;
} set;
/* Sorted set iterators. */
@@ -2025,6 +2029,9 @@ void zuiInitIterator(zsetopsrc *op) {
it->ht.dict = op->subject->ptr;
it->ht.di = dictGetIterator(op->subject->ptr);
it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ it->lp.lp = op->subject->ptr;
+ it->lp.p = lpFirst(it->lp.lp);
} else {
serverPanic("Unknown set encoding");
}
@@ -2061,6 +2068,8 @@ void zuiClearIterator(zsetopsrc *op) {
UNUSED(it); /* skip */
} else if (op->encoding == OBJ_ENCODING_HT) {
dictReleaseIterator(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ UNUSED(it);
} else {
serverPanic("Unknown set encoding");
}
@@ -2091,14 +2100,7 @@ unsigned long zuiLength(zsetopsrc *op) {
return 0;
if (op->type == OBJ_SET) {
- if (op->encoding == OBJ_ENCODING_INTSET) {
- return intsetLen(op->subject->ptr);
- } else if (op->encoding == OBJ_ENCODING_HT) {
- dict *ht = op->subject->ptr;
- return dictSize(ht);
- } else {
- serverPanic("Unknown set encoding");
- }
+ return setTypeSize(op->subject);
} else if (op->type == OBJ_ZSET) {
if (op->encoding == OBJ_ENCODING_LISTPACK) {
return zzlLength(op->subject->ptr);
@@ -2144,6 +2146,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ if (it->lp.p == NULL)
+ return 0;
+ val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell);
+ val->score = 1.0;
+
+ /* Move to next element. */
+ it->lp.p = lpNext(it->lp.lp, it->lp.p);
} else {
serverPanic("Unknown set encoding");
}
diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl
index f5f971140..97ffef6bd 100644
--- a/tests/unit/keyspace.tcl
+++ b/tests/unit/keyspace.tcl
@@ -256,30 +256,27 @@ start_server {tags {"keyspace"}} {
assert_equal $digest [debug_digest_value mynewlist{t}]
}
- test {COPY basic usage for intset set} {
- r del set1{t} newset1{t}
- r sadd set1{t} 1 2 3
- assert_encoding intset set1{t}
- r copy set1{t} newset1{t}
- set digest [debug_digest_value set1{t}]
- assert_equal $digest [debug_digest_value newset1{t}]
- assert_refcount 1 set1{t}
- assert_refcount 1 newset1{t}
- r del set1{t}
- assert_equal $digest [debug_digest_value newset1{t}]
- }
-
- test {COPY basic usage for hashtable set} {
- r del set2{t} newset2{t}
- r sadd set2{t} 1 2 3 a
- assert_encoding hashtable set2{t}
- r copy set2{t} newset2{t}
- set digest [debug_digest_value set2{t}]
- assert_equal $digest [debug_digest_value newset2{t}]
- assert_refcount 1 set2{t}
- assert_refcount 1 newset2{t}
- r del set2{t}
- assert_equal $digest [debug_digest_value newset2{t}]
+ foreach type {intset listpack hashtable} {
+ test {COPY basic usage for $type set} {
+ r del set1{t} newset1{t}
+ r sadd set1{t} 1 2 3
+ if {$type ne "intset"} {
+ r sadd set1{t} a
+ }
+ if {$type eq "hashtable"} {
+ for {set i 4} {$i < 200} {incr i} {
+ r sadd set1{t} $i
+ }
+ }
+ assert_encoding $type set1{t}
+ r copy set1{t} newset1{t}
+ set digest [debug_digest_value set1{t}]
+ assert_equal $digest [debug_digest_value newset1{t}]
+ assert_refcount 1 set1{t}
+ assert_refcount 1 newset1{t}
+ r del set1{t}
+ assert_equal $digest [debug_digest_value newset1{t}]
+ }
}
test {COPY basic usage for listpack sorted set} {
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 5dea76be7..45397d7a3 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -98,7 +98,7 @@ start_server {tags {"scan network"}} {
assert_equal 1000 [llength $keys]
}
- foreach enc {intset hashtable} {
+ foreach enc {intset listpack hashtable} {
test "SSCAN with encoding $enc" {
# Create the Set
r del set
@@ -107,8 +107,9 @@ start_server {tags {"scan network"}} {
} else {
set prefix "ele:"
}
+ set count [expr {$enc eq "hashtable" ? 200 : 100}]
set elements {}
- for {set j 0} {$j < 100} {incr j} {
+ for {set j 0} {$j < $count} {incr j} {
lappend elements ${prefix}${j}
}
r sadd set {*}$elements
@@ -128,7 +129,7 @@ start_server {tags {"scan network"}} {
}
set keys [lsort -unique $keys]
- assert_equal 100 [llength $keys]
+ assert_equal $count [llength $keys]
}
}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 30b6dc5d7..34c2ea76d 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -2,6 +2,8 @@ start_server {
tags {"set"}
overrides {
"set-max-intset-entries" 512
+ "set-max-listpack-entries" 128
+ "set-max-listpack-value" 32
}
} {
proc create_set {key entries} {
@@ -9,12 +11,19 @@ start_server {
foreach entry $entries { r sadd $key $entry }
}
- test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - regular set} {
- create_set myset {foo}
- assert_encoding hashtable myset
+ # Values for initialing sets, per encoding.
+ array set initelems {listpack {foo} hashtable {foo}}
+ for {set i 0} {$i < 130} {incr i} {
+ lappend initelems(hashtable) [format "i%03d" $i]
+ }
+
+ foreach type {listpack hashtable} {
+ test "SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - $type" {
+ create_set myset $initelems($type)
+ assert_encoding $type myset
assert_equal 1 [r sadd myset bar]
assert_equal 0 [r sadd myset bar]
- assert_equal 2 [r scard myset]
+ assert_equal [expr [llength $initelems($type)] + 1] [r scard myset]
assert_equal 1 [r sismember myset foo]
assert_equal 1 [r sismember myset bar]
assert_equal 0 [r sismember myset bla]
@@ -23,7 +32,8 @@ start_server {
assert_equal {1 0} [r smismember myset foo bla]
assert_equal {0 1} [r smismember myset bla foo]
assert_equal {0} [r smismember myset bla]
- assert_equal {bar foo} [lsort [r smembers myset]]
+ assert_equal "bar $initelems($type)" [lsort [r smembers myset]]
+ }
}
test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - intset} {
@@ -67,15 +77,33 @@ start_server {
assert_error WRONGTYPE* {r sadd mylist bar}
}
- test "SADD a non-integer against an intset" {
+ test "SADD a non-integer against a small intset" {
create_set myset {1 2 3}
assert_encoding intset myset
assert_equal 1 [r sadd myset a]
+ assert_encoding listpack myset
+ }
+
+ test "SADD a non-integer against a large intset" {
+ create_set myset {0}
+ for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+ assert_encoding intset myset
+ assert_equal 1 [r sadd myset a]
assert_encoding hashtable myset
}
test "SADD an integer larger than 64 bits" {
create_set myset {213244124402402314402033402}
+ assert_encoding listpack myset
+ assert_equal 1 [r sismember myset 213244124402402314402033402]
+ assert_equal {1} [r smismember myset 213244124402402314402033402]
+ }
+
+ test "SADD an integer larger than 64 bits to a large intset" {
+ create_set myset {0}
+ for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+ assert_encoding intset myset
+ r sadd myset 213244124402402314402033402
assert_encoding hashtable myset
assert_equal 1 [r sismember myset 213244124402402314402033402]
assert_equal {1} [r smismember myset 213244124402402314402033402]
@@ -100,25 +128,32 @@ start_server {
r del myintset
r del myhashset
r del mylargeintset
+ r del mysmallset
for {set i 0} {$i < 100} {incr i} { r sadd myintset $i }
for {set i 0} {$i < 1280} {incr i} { r sadd mylargeintset $i }
+ for {set i 0} {$i < 50} {incr i} { r sadd mysmallset [format "i%03d" $i] }
for {set i 0} {$i < 256} {incr i} { r sadd myhashset [format "i%03d" $i] }
assert_encoding intset myintset
assert_encoding hashtable mylargeintset
+ assert_encoding listpack mysmallset
assert_encoding hashtable myhashset
r debug reload
assert_encoding intset myintset
assert_encoding hashtable mylargeintset
+ assert_encoding listpack mysmallset
assert_encoding hashtable myhashset
} {} {needs:debug}
- test {SREM basics - regular set} {
- create_set myset {foo bar ciao}
- assert_encoding hashtable myset
- assert_equal 0 [r srem myset qux]
- assert_equal 1 [r srem myset foo]
- assert_equal {bar ciao} [lsort [r smembers myset]]
+ foreach type {listpack hashtable} {
+ test {SREM basics - $type} {
+ create_set myset $initelems($type)
+ r sadd myset ciao
+ assert_encoding $type myset
+ assert_equal 0 [r srem myset qux]
+ assert_equal 1 [r srem myset ciao]
+ assert_equal $initelems($type) [lsort [r smembers myset]]
+ }
}
test {SREM basics - intset} {
@@ -177,7 +212,18 @@ start_server {
assert_equal 0 [r sintercard 1 non-existing-key limit 10]
}
- foreach {type} {hashtable intset} {
+ foreach {type} {regular intset} {
+ # Create sets setN{t} where N = 1..5
+ if {$type eq "regular"} {
+ set smallenc listpack
+ set bigenc hashtable
+ } else {
+ set smallenc intset
+ set bigenc intset
+ }
+ # Sets 1, 2 and 4 are big; sets 3 and 5 are small.
+ array set encoding "1 $bigenc 2 $bigenc 3 $smallenc 4 $bigenc 5 $smallenc"
+
for {set i 1} {$i <= 5} {incr i} {
r del [format "set%d{t}" $i]
}
@@ -198,7 +244,7 @@ start_server {
# while the tests are running -- an extra element is added to every
# set that determines its encoding.
set large 200
- if {$type eq "hashtable"} {
+ if {$type eq "regular"} {
set large foo
}
@@ -206,9 +252,9 @@ start_server {
r sadd [format "set%d{t}" $i] $large
}
- test "Generated sets must be encoded as $type" {
+ test "Generated sets must be encoded correctly - $type" {
for {set i 1} {$i <= 5} {incr i} {
- assert_encoding $type [format "set%d{t}" $i]
+ assert_encoding $encoding($i) [format "set%d{t}" $i]
}
}
@@ -225,14 +271,14 @@ start_server {
test "SINTERSTORE with two sets - $type" {
r sinterstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $smallenc setres{t}
assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
}
test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" {
r debug reload
r sinterstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $smallenc setres{t}
assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
} {} {needs:debug}
@@ -243,7 +289,7 @@ start_server {
test "SUNIONSTORE with two sets - $type" {
r sunionstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $bigenc setres{t}
set expected [lsort -uniq "[r smembers set1{t}] [r smembers set2{t}]"]
assert_equal $expected [lsort [r smembers setres{t}]]
}
@@ -294,6 +340,46 @@ start_server {
}
}
+ test "SINTERSTORE with two listpack sets where result is intset" {
+ r del setres{t} set1{t} set2{t}
+ r sadd set1{t} a b c 1 3 6 x y z
+ r sadd set2{t} e f g 1 2 3 u v w
+ assert_encoding listpack set1{t}
+ assert_encoding listpack set2{t}
+ r sinterstore setres{t} set1{t} set2{t}
+ assert_equal [list 1 3] [lsort [r smembers setres{t}]]
+ assert_encoding intset setres{t}
+ }
+
+ test "SINTERSTORE with two hashtable sets where result is intset" {
+ r del setres{t} set1{t} set2{t}
+ r sadd set1{t} a b c 444 555 666
+ r sadd set2{t} e f g 111 222 333
+ set expected {}
+ for {set i 1} {$i < 130} {incr i} {
+ r sadd set1{t} $i
+ r sadd set2{t} $i
+ lappend expected $i
+ }
+ assert_encoding hashtable set1{t}
+ assert_encoding hashtable set2{t}
+ r sinterstore setres{t} set1{t} set2{t}
+ assert_equal [lsort $expected] [lsort [r smembers setres{t}]]
+ assert_encoding intset setres{t}
+ }
+
+ test "SUNION hashtable and listpack" {
+ # This adds code coverage for adding a non-sds string to a hashtable set
+ # which already contains the string.
+ r del set1{t} set2{t}
+ set union {abcdefghijklmnopqrstuvwxyz1234567890 a b c 1 2 3}
+ create_set set1{t} $union
+ create_set set2{t} {a b c}
+ assert_encoding hashtable set1{t}
+ assert_encoding listpack set2{t}
+ assert_equal [lsort $union] [lsort [r sunion set1{t} set2{t}]]
+ }
+
test "SDIFF with first set empty" {
r del set1{t} set2{t} set3{t}
r sadd set2{t} 1 2 3 4
@@ -428,7 +514,7 @@ start_server {
r sadd set2{t} 1 2 3 a
r srem set2{t} a
assert_encoding intset set1{t}
- assert_encoding hashtable set2{t}
+ assert_encoding listpack set2{t}
lsort [r sinter set1{t} set2{t}]
} {1 2 3}
@@ -549,7 +635,7 @@ start_server {
assert_equal 0 [r exists setres{t}]
}
- foreach {type contents} {hashtable {a b c} intset {1 2 3}} {
+ foreach {type contents} {listpack {a b c} intset {1 2 3}} {
test "SPOP basics - $type" {
create_set myset $contents
assert_encoding $type myset
@@ -575,11 +661,20 @@ start_server {
}
}
+ test "SPOP integer from listpack set" {
+ create_set myset {a 1 2 3 4 5 6 7}
+ assert_encoding listpack myset
+ set a [r spop myset]
+ set b [r spop myset]
+ assert {[string is digit $a] || [string is digit $b]}
+ }
+
foreach {type contents} {
- hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+ listpack {a b c d e f g h i j k l m n o p q r s t u v w x y z}
intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+ hashtable {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 b c d e f g h i j k l m n o p q r s t u v w x y z}
} {
- test "SPOP with <count>" {
+ test "SPOP with <count> - $type" {
create_set myset $contents
assert_encoding $type myset
assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
@@ -610,16 +705,20 @@ start_server {
r spop nonexisting_key 100
} {}
- test "SPOP new implementation: code path #1" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ foreach {type content} {
+ intset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ listpack {a 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ } {
+ test "SPOP new implementation: code path #1 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 30]
assert {[lsort $content] eq [lsort $res]}
}
- test "SPOP new implementation: code path #2" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ test "SPOP new implementation: code path #2 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 2]
assert {[llength $res] == 2}
assert {[r scard myset] == 18}
@@ -627,15 +726,16 @@ start_server {
assert {[lsort $union] eq [lsort $content]}
}
- test "SPOP new implementation: code path #3" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ test "SPOP new implementation: code path #3 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 18]
assert {[llength $res] == 18}
assert {[r scard myset] == 2}
set union [concat [r smembers myset] $res]
assert {[lsort $union] eq [lsort $content]}
}
+ }
test "SRANDMEMBER count of 0 is handled correctly" {
r srandmember myset 0
@@ -659,7 +759,7 @@ start_server {
r readraw 0
foreach {type contents} {
- hashtable {
+ listpack {
1 5 10 50 125 50000 33959417 4775547 65434162
12098459 427716 483706 2726473884 72615637475
MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
@@ -674,9 +774,20 @@ start_server {
30 31 32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47 48 49
}
+ hashtable {
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+ 1 5 10 50 125 50000 33959417 4775547 65434162
+ 12098459 427716 483706 2726473884 72615637475
+ MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
+ SUSAN MARGARET DOROTHY LISA NANCY KAREN BETTY HELEN
+ SANDRA DONNA CAROL RUTH SHARON MICHELLE LAURA SARAH
+ KIMBERLY DEBORAH JESSICA SHIRLEY CYNTHIA ANGELA MELISSA
+ BRENDA AMY ANNA REBECCA VIRGINIA
+ }
} {
test "SRANDMEMBER with <count> - $type" {
create_set myset $contents
+ assert_encoding $type myset
unset -nocomplain myset
array set myset {}
foreach ele [r smembers myset] {
@@ -767,16 +878,22 @@ start_server {
}
foreach {type contents} {
- hashtable {
+ listpack {
1 5 10 50 125
MARY PATRICIA LINDA BARBARA ELIZABETH
}
intset {
0 1 2 3 4 5 6 7 8 9
}
+ hashtable {
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+ 1 5 10 50 125
+ MARY PATRICIA LINDA BARBARA
+ }
} {
test "SRANDMEMBER histogram distribution - $type" {
create_set myset $contents
+ assert_encoding $type myset
unset -nocomplain myset
array set myset {}
foreach ele [r smembers myset] {
@@ -809,7 +926,7 @@ start_server {
r del myset3{t} myset4{t}
create_set myset1{t} {1 a b}
create_set myset2{t} {2 3 4}
- assert_encoding hashtable myset1{t}
+ assert_encoding listpack myset1{t}
assert_encoding intset myset2{t}
}
@@ -819,7 +936,7 @@ start_server {
assert_equal 1 [r smove myset1{t} myset2{t} a]
assert_equal {1 b} [lsort [r smembers myset1{t}]]
assert_equal {2 3 4 a} [lsort [r smembers myset2{t}]]
- assert_encoding hashtable myset2{t}
+ assert_encoding listpack myset2{t}
# move an integer element should not convert the encoding
setup_move
@@ -855,7 +972,7 @@ start_server {
assert_equal 1 [r smove myset1{t} myset3{t} a]
assert_equal {1 b} [lsort [r smembers myset1{t}]]
assert_equal {a} [lsort [r smembers myset3{t}]]
- assert_encoding hashtable myset3{t}
+ assert_encoding listpack myset3{t}
}
test "SMOVE from intset to non existing destination set" {