summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--redis.conf9
-rw-r--r--src/aof.c67
-rw-r--r--src/config.c2
-rw-r--r--src/db.c6
-rw-r--r--src/defrag.c10
-rw-r--r--src/intset.c13
-rw-r--r--src/intset.h2
-rw-r--r--src/listpack.c260
-rw-r--r--src/listpack.h4
-rw-r--r--src/object.c10
-rw-r--r--src/rdb.c87
-rw-r--r--src/rdb.h5
-rw-r--r--src/redis-check-rdb.c3
-rw-r--r--src/server.h8
-rw-r--r--src/t_set.c727
-rw-r--r--src/t_zset.c26
-rw-r--r--tests/unit/keyspace.tcl45
-rw-r--r--tests/unit/scan.tcl7
-rw-r--r--tests/unit/type/set.tcl185
19 files changed, 1132 insertions, 344 deletions
diff --git a/redis.conf b/redis.conf
index dfc9de308..408d0c258 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1951,13 +1951,20 @@ list-max-listpack-size -2
# etc.
list-compress-depth 0
-# Sets have a special encoding in just one case: when a set is composed
+# Sets have a special encoding when a set is composed
# of just strings that happen to be integers in radix 10 in the range
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
set-max-intset-entries 512
+# Sets containing non-integer values are also encoded using a memory efficient
+# data structure when they have a small number of entries, and the biggest entry
+# does not exceed a given threshold. These thresholds can be configured using
+# the following directives.
+set-max-listpack-entries 128
+set-max-listpack-value 64
+
# Similarly to hashes and lists, sorted sets are also specially encoded in
# order to save a lot of space. This encoding is only used when the length and
# elements of a sorted set are below the following limits:
diff --git a/src/aof.c b/src/aof.c
index 52026ab24..f1bf9a1a6 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
* The function returns 0 on error, 1 on success. */
int rewriteSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = setTypeSize(o);
-
- if (o->encoding == OBJ_ENCODING_INTSET) {
- int ii = 0;
- int64_t llval;
-
- while(intsetGet(o->ptr,ii++,&llval)) {
- if (count == 0) {
- int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
- AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
- !rioWriteBulkString(r,"SADD",4) ||
- !rioWriteBulkObject(r,key))
- {
- return 0;
- }
+ setTypeIterator *si = setTypeInitIterator(o);
+ char *str;
+ size_t len;
+ int64_t llval;
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (count == 0) {
+ int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
+ AOF_REWRITE_ITEMS_PER_CMD : items;
+ if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
+ !rioWriteBulkString(r,"SADD",4) ||
+ !rioWriteBulkObject(r,key))
+ {
+ return 0;
}
- if (!rioWriteBulkLongLong(r,llval)) return 0;
- if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
}
- } else if (o->encoding == OBJ_ENCODING_HT) {
- dictIterator *di = dictGetIterator(o->ptr);
- dictEntry *de;
-
- while((de = dictNext(di)) != NULL) {
- sds ele = dictGetKey(de);
- if (count == 0) {
- int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
- AOF_REWRITE_ITEMS_PER_CMD : items;
-
- if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
- !rioWriteBulkString(r,"SADD",4) ||
- !rioWriteBulkObject(r,key))
- {
- dictReleaseIterator(di);
- return 0;
- }
- }
- if (!rioWriteBulkString(r,ele,sdslen(ele))) {
- dictReleaseIterator(di);
- return 0;
- }
- if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
- items--;
+ size_t written = str ?
+ rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
+ if (!written) {
+ setTypeReleaseIterator(si);
+ return 0;
}
- dictReleaseIterator(di);
- } else {
- serverPanic("Unknown set encoding");
+ if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
+ items--;
}
+ setTypeReleaseIterator(si);
return 1;
}
diff --git a/src/config.c b/src/config.c
index b11e4dda4..c5eab41a9 100644
--- a/src/config.c
+++ b/src/config.c
@@ -3130,6 +3130,8 @@ standardConfig static_configs[] = {
/* Size_t configs */
createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
+ createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
diff --git a/src/db.c b/src/db.c
index a560ce052..5ebd21e4b 100644
--- a/src/db.c
+++ b/src/db.c
@@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
} while (cursor &&
maxiterations-- &&
listLength(keys) < (unsigned long)count);
- } else if (o->type == OBJ_SET) {
+ } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) {
int pos = 0;
int64_t ll;
while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
- } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
+ } else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) &&
+ o->encoding == OBJ_ENCODING_LISTPACK)
+ {
unsigned char *p = lpFirst(o->ptr);
unsigned char *vstr;
int64_t vlen;
diff --git a/src/defrag.c b/src/defrag.c
index ced4fd20a..e78c07929 100644
--- a/src/defrag.c
+++ b/src/defrag.c
@@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) {
} else if (ob->type == OBJ_SET) {
if (ob->encoding == OBJ_ENCODING_HT) {
defragged += defragSet(db, de);
- } else if (ob->encoding == OBJ_ENCODING_INTSET) {
- intset *newis, *is = ob->ptr;
- if ((newis = activeDefragAlloc(is)))
- defragged++, ob->ptr = newis;
+ } else if (ob->encoding == OBJ_ENCODING_INTSET ||
+ ob->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ void *newptr, *ptr = ob->ptr;
+ if ((newptr = activeDefragAlloc(ptr)))
+ defragged++, ob->ptr = newptr;
} else {
serverPanic("Unknown set encoding");
}
diff --git a/src/intset.c b/src/intset.c
index e96037da8..621a74283 100644
--- a/src/intset.c
+++ b/src/intset.c
@@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%len);
}
+/* Return the largest member. */
+int64_t intsetMax(intset *is) {
+ uint32_t len = intrev32ifbe(is->length);
+ return _intsetGet(is, len - 1);
+}
+
+/* Return the smallest member. */
+int64_t intsetMin(intset *is) {
+ return _intsetGet(is, 0);
+}
+
/* Get the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
@@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) {
is = intsetAdd(is,6,&success); assert(success);
is = intsetAdd(is,4,&success); assert(success);
is = intsetAdd(is,4,&success); assert(!success);
+ assert(6 == intsetMax(is));
+ assert(4 == intsetMin(is));
ok();
zfree(is);
}
diff --git a/src/intset.h b/src/intset.h
index 772f847b6..41cc7b822 100644
--- a/src/intset.h
+++ b/src/intset.h
@@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
+int64_t intsetMax(intset *is);
+int64_t intsetMin(intset *is);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(const intset *is);
size_t intsetBlobLen(intset *is);
diff --git a/src/listpack.c b/src/listpack.c
index db9a1b4ab..bfa364f98 100644
--- a/src/listpack.c
+++ b/src/listpack.c
@@ -1063,6 +1063,55 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) {
return lp;
}
+/* Delete the elements 'ps' passed as an array of 'count' element pointers and
+ * return the resulting listpack. The elements must be given in the same order
+ * as they apper in the listpack. */
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count) {
+ if (count == 0) return lp;
+ unsigned char *dst = ps[0];
+ size_t total_bytes = lpGetTotalBytes(lp);
+ unsigned char *lp_end = lp + total_bytes; /* After the EOF element. */
+ assert(lp_end[-1] == LP_EOF);
+ /*
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ... | Delete | Keep | Delete | Keep | ... |EOF|
+ * ... |xxxxxxxx| |xxxxxxxx| | ... | |
+ * ----+--------+-----------+--------+---------+-----+---+
+ * ^ ^ ^ ^
+ * | | | |
+ * ps[i] | ps[i+1] |
+ * skip keep_start keep_end lp_end
+ *
+ * The loop memmoves the bytes between keep_start and keep_end to dst.
+ */
+ for (unsigned long i = 0; i < count; i++) {
+ unsigned char *skip = ps[i];
+ assert(skip != NULL && skip[0] != LP_EOF);
+ unsigned char *keep_start = lpSkip(skip);
+ unsigned char *keep_end;
+ if (i + 1 < count) {
+ keep_end = ps[i + 1];
+ /* Deleting consecutive elements. Nothing to keep between them. */
+ if (keep_start == keep_end) continue;
+ } else {
+ /* Keep the rest of the listpack including the EOF marker. */
+ keep_end = lp_end;
+ }
+ assert(keep_end > keep_start);
+ size_t bytes_to_keep = keep_end - keep_start;
+ memmove(dst, keep_start, bytes_to_keep);
+ dst += bytes_to_keep;
+ }
+ /* Update total size and num elements. */
+ size_t deleted_bytes = lp_end - dst;
+ total_bytes -= deleted_bytes;
+ assert(lp[total_bytes - 1] == LP_EOF);
+ lpSetTotalBytes(lp, total_bytes);
+ uint32_t numele = lpGetNumElements(lp);
+ if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - count);
+ return lpShrinkToFit(lp);
+}
+
/* Merge listpacks 'first' and 'second' by appending 'second' to 'first'.
*
* NOTE: The larger listpack is reallocated to contain the new merged listpack.
@@ -1383,6 +1432,43 @@ void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *k
val->sval = lpGetValue(p, &(val->slen), &(val->lval));
}
+/* Randomly select 'count' entries and store them in the 'entries' array, which
+ * needs to have space for 'count' listpackEntry structs. The order is random
+ * and duplicates are possible. */
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries) {
+ struct pick {
+ unsigned int index;
+ unsigned int order;
+ } *picks = lp_malloc(count * sizeof(struct pick));
+ unsigned int total_size = lpLength(lp);
+ assert(total_size);
+ for (unsigned int i = 0; i < count; i++) {
+ picks[i].index = rand() % total_size;
+ picks[i].order = i;
+ }
+
+ /* Sort by index. */
+ qsort(picks, count, sizeof(struct pick), uintCompare);
+
+ /* Iterate over listpack in index order and store the values in the entries
+ * array respecting the original order. */
+ unsigned char *p = lpFirst(lp);
+ unsigned int j = 0; /* index in listpack */
+ for (unsigned int i = 0; i < count; i++) {
+ /* Advance listpack pointer to until we reach 'index' listpack. */
+ while (j < picks[i].index) {
+ p = lpNext(lp, p);
+ j++;
+ }
+ int storeorder = picks[i].order;
+ unsigned int len = 0;
+ long long llval = 0;
+ unsigned char *str = lpGetValue(p, &len, &llval);
+ lpSaveValue(str, len, llval, &entries[storeorder]);
+ }
+ lp_free(picks);
+}
+
/* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random, and the selections
* are non-unique (repetitions are possible).
@@ -1449,34 +1535,83 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack
if (count > total_size)
count = total_size;
- /* To only iterate once, every time we try to pick a member, the probability
- * we pick it is the quotient of the count left we want to pick and the
- * count still we haven't visited in the dict, this way, we could make every
- * member be equally picked.*/
p = lpFirst(lp);
unsigned int picked = 0, remaining = count;
while (picked < count && p) {
- double randomDouble = ((double)rand()) / RAND_MAX;
- double threshold = ((double)remaining) / (total_size - index);
- if (randomDouble <= threshold) {
+ assert((p = lpNextRandom(lp, p, &index, remaining, 1)));
+ key = lpGetValue(p, &klen, &klval);
+ lpSaveValue(key, klen, klval, &keys[picked]);
+ assert((p = lpNext(lp, p)));
+ index++;
+ if (vals) {
key = lpGetValue(p, &klen, &klval);
- lpSaveValue(key, klen, klval, &keys[picked]);
- assert((p = lpNext(lp, p)));
- if (vals) {
- key = lpGetValue(p, &klen, &klval);
- lpSaveValue(key, klen, klval, &vals[picked]);
- }
- remaining--;
- picked++;
- } else {
- assert((p = lpNext(lp, p)));
+ lpSaveValue(key, klen, klval, &vals[picked]);
}
p = lpNext(lp, p);
+ remaining--;
+ picked++;
index++;
}
return picked;
}
+/* Iterates forward to the "next random" element, given we are yet to pick
+ * 'remaining' unique elements between the starting element 'p' (inclusive) and
+ * the end of the list. The 'index' needs to be initialized according to the
+ * current zero-based index matching the position of the starting element 'p'
+ * and is updated to match the returned element's zero-based index. If
+ * 'even_only' is nonzero, an element with an even index is picked, which is
+ * useful if the listpack represents a key-value pair sequence.
+ *
+ * Note that this function can return p. In order to skip the previously
+ * returned element, you need to call lpNext() or lpDelete() after each call to
+ * lpNextRandom(). Idea:
+ *
+ * assert(remaining <= lpLength(lp));
+ * p = lpFirst(lp);
+ * i = 0;
+ * while (remaining > 0) {
+ * p = lpNextRandom(lp, p, &i, remaining--, 0);
+ *
+ * // ... Do stuff with p ...
+ *
+ * p = lpNext(lp, p);
+ * i++;
+ * }
+ */
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only)
+{
+ /* To only iterate once, every time we try to pick a member, the probability
+ * we pick it is the quotient of the count left we want to pick and the
+ * count still we haven't visited. This way, we could make every member be
+ * equally likely to be picked. */
+ unsigned int i = *index;
+ unsigned int total_size = lpLength(lp);
+ while (i < total_size && p != NULL) {
+ if (even_only && i % 2 != 0) {
+ p = lpNext(lp, p);
+ i++;
+ continue;
+ }
+
+ /* Do we pick this element? */
+ unsigned int available = total_size - i;
+ if (even_only) available /= 2;
+ double randomDouble = ((double)rand()) / RAND_MAX;
+ double threshold = ((double)remaining) / available;
+ if (randomDouble <= threshold) {
+ *index = i;
+ return p;
+ }
+
+ p = lpNext(lp, p);
+ i++;
+ }
+
+ return NULL;
+}
+
/* Print info of listpack which is used in debugCommand */
void lpRepr(unsigned char *lp) {
unsigned char *p, *vstr;
@@ -1902,6 +2037,21 @@ int listpackTest(int argc, char *argv[], int flags) {
zfree(lp);
}
+ TEST("Batch delete") {
+ unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */
+ assert(lpLength(lp) == 4); /* Pre-condition */
+ unsigned char *p0 = lpFirst(lp),
+ *p1 = lpNext(lp, p0),
+ *p2 = lpNext(lp, p1),
+ *p3 = lpNext(lp, p2);
+ unsigned char *ps[] = {p0, p1, p3};
+ lp = lpBatchDelete(lp, ps, 3);
+ assert(lpLength(lp) == 1);
+ verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2]));
+ assert(lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL) == 1);
+ lpFree(lp);
+ }
+
TEST("Delete foo while iterating") {
lp = createList();
p = lpFirst(lp);
@@ -2048,6 +2198,82 @@ int listpackTest(int argc, char *argv[], int flags) {
zfree(lp3);
}
+ TEST("lpNextRandom normal usage") {
+ /* Create some data */
+ unsigned char *lp = lpNew(0);
+ unsigned char buf[100] = "asdf";
+ unsigned int size = 100;
+ for (size_t i = 0; i < size; i++) {
+ lp = lpAppend(lp, buf, i);
+ }
+ assert(lpLength(lp) == size);
+
+ /* Pick a subset of the elements of every possible subset size */
+ for (unsigned int count = 0; count <= size; count++) {
+ unsigned int remaining = count;
+ unsigned char *p = lpFirst(lp);
+ unsigned char *prev = NULL;
+ unsigned index = 0;
+ while (remaining > 0) {
+ assert(p != NULL);
+ p = lpNextRandom(lp, p, &index, remaining--, 0);
+ assert(p != NULL);
+ assert(p != prev);
+ prev = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ }
+ }
+
+ TEST("lpNextRandom corner cases") {
+ unsigned char *lp = lpNew(0);
+ unsigned i = 0;
+
+ /* Pick from empty listpack returns NULL. */
+ assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL);
+
+ /* Add some elements and find their pointers within the listpack. */
+ lp = lpAppend(lp, (unsigned char *)"abc", 3);
+ lp = lpAppend(lp, (unsigned char *)"def", 3);
+ lp = lpAppend(lp, (unsigned char *)"ghi", 3);
+ assert(lpLength(lp) == 3);
+ unsigned char *p0 = lpFirst(lp);
+ unsigned char *p1 = lpNext(lp, p0);
+ unsigned char *p2 = lpNext(lp, p1);
+ assert(lpNext(lp, p2) == NULL);
+
+ /* Pick zero elements returns NULL. */
+ i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL);
+
+ /* Pick all returns all. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1);
+ i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2);
+
+ /* Pick more than one when there's only one left returns the last one. */
+ i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2);
+
+ /* Pick all even elements returns p0 and p2. */
+ i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0);
+ i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2);
+
+ /* Don't crash even for bad index. */
+ for (int j = 0; j < 100; j++) {
+ unsigned char *p;
+ switch (j % 4) {
+ case 0: p = p0; break;
+ case 1: p = p1; break;
+ case 2: p = p2; break;
+ case 3: p = NULL; break;
+ }
+ i = j % 7;
+ unsigned int remaining = j % 5;
+ p = lpNextRandom(lp, p, &i, remaining, 0);
+ assert(p == p0 || p == p1 || p == p2 || p == NULL);
+ }
+ }
+
TEST("Random pair with one element") {
listpackEntry key, val;
unsigned char *lp = lpNew(0);
diff --git a/src/listpack.h b/src/listpack.h
index 3e750af5b..ce27ea367 100644
--- a/src/listpack.h
+++ b/src/listpack.h
@@ -70,6 +70,7 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long
unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp);
unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num);
unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num);
+unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count);
unsigned char *lpMerge(unsigned char **first, unsigned char **second);
unsigned long lpLength(unsigned char *lp);
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf);
@@ -90,6 +91,9 @@ unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen);
void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val);
void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals);
+void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries);
+unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index,
+ unsigned int remaining, int even_only);
int lpSafeToAdd(unsigned char* lp, size_t add);
void lpRepr(unsigned char *lp);
diff --git a/src/object.c b/src/object.c
index 44fd5befd..d48af4128 100644
--- a/src/object.c
+++ b/src/object.c
@@ -247,6 +247,13 @@ robj *createIntsetObject(void) {
return o;
}
+robj *createSetListpackObject(void) {
+ unsigned char *lp = lpNew(0);
+ robj *o = createObject(OBJ_SET, lp);
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ return o;
+}
+
robj *createHashObject(void) {
unsigned char *zl = lpNew(0);
robj *o = createObject(OBJ_HASH, zl);
@@ -306,6 +313,7 @@ void freeSetObject(robj *o) {
dictRelease((dict*) o->ptr);
break;
case OBJ_ENCODING_INTSET:
+ case OBJ_ENCODING_LISTPACK:
zfree(o->ptr);
break;
default:
@@ -441,6 +449,8 @@ void dismissSetObject(robj *o, size_t size_hint) {
dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*));
} else if (o->encoding == OBJ_ENCODING_INTSET) {
dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr));
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
} else {
serverPanic("Unknown set encoding type");
}
diff --git a/src/rdb.c b/src/rdb.c
index 083e5cf89..bbb61aafb 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -665,6 +665,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
return rdbSaveType(rdb,RDB_TYPE_SET_INTSET);
else if (o->encoding == OBJ_ENCODING_HT)
return rdbSaveType(rdb,RDB_TYPE_SET);
+ else if (o->encoding == OBJ_ENCODING_LISTPACK)
+ return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK);
else
serverPanic("Unknown set encoding");
case OBJ_ZSET:
@@ -858,6 +860,10 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1;
nwritten += n;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ size_t l = lpBytes((unsigned char *)o->ptr);
+ if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1;
+ nwritten += n;
} else {
serverPanic("Unknown set encoding");
}
@@ -1690,19 +1696,21 @@ static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int he
}
/* callback for to check the listpack doesn't have duplicate records */
-static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
+static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) {
struct {
+ int pairs;
long count;
dict *fields;
} *data = userdata;
if (data->fields == NULL) {
data->fields = dictCreate(&hashDictType);
- dictExpand(data->fields, head_count/2);
+ dictExpand(data->fields, data->pairs ? head_count/2 : head_count);
}
- /* Even records are field names, add to dict and check that's not a dup */
- if (((data->count) & 1) == 0) {
+ /* If we're checking pairs, then even records are field names. Otherwise
+ * we're checking all elements. Add to dict and check that's not a dup */
+ if (!data->pairs || ((data->count) & 1) == 0) {
unsigned char *str;
int64_t slen;
unsigned char buf[LP_INTBUF_SIZE];
@@ -1722,21 +1730,24 @@ static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, vo
/* Validate the integrity of the listpack structure.
* when `deep` is 0, only the integrity of the header is validated.
- * when `deep` is 1, we scan all the entries one by one. */
-int lpPairsValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep) {
+ * when `deep` is 1, we scan all the entries one by one.
+ * when `pairs` is 0, all elements need to be unique (it's a set)
+ * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */
+int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) {
if (!deep)
return lpValidateIntegrity(lp, size, 0, NULL, NULL);
/* Keep track of the field names to locate duplicate ones */
struct {
+ int pairs;
long count;
dict *fields; /* Initialisation at the first callback. */
- } data = {0, NULL};
+ } data = {pairs, 0, NULL};
- int ret = lpValidateIntegrity(lp, size, 1, _lpPairsEntryValidation, &data);
+ int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data);
/* make sure we have an even number of records. */
- if (data.count & 1)
+ if (pairs && data.count & 1)
ret = 0;
if (data.fields) dictRelease(data.fields);
@@ -1813,6 +1824,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
/* Load every single element of the set */
+ size_t maxelelen = 0, sumelelen = 0;
for (i = 0; i < len; i++) {
long long llval;
sds sdsele;
@@ -1821,6 +1833,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
decrRefCount(o);
return NULL;
}
+ size_t elelen = sdslen(sdsele);
+ sumelelen += elelen;
+ if (elelen > maxelelen) maxelelen = elelen;
if (o->encoding == OBJ_ENCODING_INTSET) {
/* Fetch integer value from element. */
@@ -1833,6 +1848,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
sdsfree(sdsele);
return NULL;
}
+ } else if (setTypeSize(o) < server.set_max_listpack_entries &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, sumelelen))
+ {
+ /* We checked if it's safe to add one large element instead
+ * of many small ones. It's OK since lpSafeToAdd doesn't
+ * care about individual elements, only the total size. */
+ setTypeConvert(o, OBJ_ENCODING_LISTPACK);
} else {
setTypeConvert(o,OBJ_ENCODING_HT);
if (dictTryExpand(o->ptr,len) != DICT_OK) {
@@ -1845,6 +1868,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
/* This will also be called when the set was just converted
+ * to a listpack encoded set. */
+ if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ if (setTypeSize(o) < server.set_max_listpack_entries &&
+ elelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(o->ptr, elelen))
+ {
+ unsigned char *p = lpFirst(o->ptr);
+ if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) {
+ rdbReportCorruptRDB("Duplicate set members detected");
+ decrRefCount(o);
+ sdsfree(sdsele);
+ return NULL;
+ }
+ o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
+ } else {
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ if (dictTryExpand(o->ptr, len) != DICT_OK) {
+ rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+ (unsigned long long)len);
+ sdsfree(sdsele);
+ decrRefCount(o);
+ return NULL;
+ }
+ }
+ }
+
+ /* This will also be called when the set was just converted
* to a regular hash table encoded set. */
if (o->encoding == OBJ_ENCODING_HT) {
if (dictAdd((dict*)o->ptr,sdsele,NULL) != DICT_OK) {
@@ -2126,6 +2176,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
} else if (rdbtype == RDB_TYPE_HASH_ZIPMAP ||
rdbtype == RDB_TYPE_LIST_ZIPLIST ||
rdbtype == RDB_TYPE_SET_INTSET ||
+ rdbtype == RDB_TYPE_SET_LISTPACK ||
rdbtype == RDB_TYPE_ZSET_ZIPLIST ||
rdbtype == RDB_TYPE_ZSET_LISTPACK ||
rdbtype == RDB_TYPE_HASH_ZIPLIST ||
@@ -2243,6 +2294,20 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
if (intsetLen(o->ptr) > server.set_max_intset_entries)
setTypeConvert(o,OBJ_ENCODING_HT);
break;
+ case RDB_TYPE_SET_LISTPACK:
+ if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) {
+ rdbReportCorruptRDB("Set listpack integrity check failed.");
+ zfree(encoded);
+ o->ptr = NULL;
+ decrRefCount(o);
+ return NULL;
+ }
+ o->type = OBJ_SET;
+ o->encoding = OBJ_ENCODING_LISTPACK;
+ if (setTypeSize(o) > server.set_max_listpack_entries)
+ setTypeConvert(o, OBJ_ENCODING_HT);
+ break;
case RDB_TYPE_ZSET_ZIPLIST:
{
unsigned char *lp = lpNew(encoded_len);
@@ -2272,7 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
case RDB_TYPE_ZSET_LISTPACK:
if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
- if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
rdbReportCorruptRDB("Zset listpack integrity check failed.");
zfree(encoded);
o->ptr = NULL;
@@ -2318,7 +2383,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
}
case RDB_TYPE_HASH_LISTPACK:
if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
- if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) {
+ if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) {
rdbReportCorruptRDB("Hash listpack integrity check failed.");
zfree(encoded);
o->ptr = NULL;
diff --git a/src/rdb.h b/src/rdb.h
index d215334db..4350d3a28 100644
--- a/src/rdb.h
+++ b/src/rdb.h
@@ -38,7 +38,7 @@
/* The current RDB version. When the format changes in a way that is no longer
* backward compatible this number gets incremented. */
-#define RDB_VERSION 10
+#define RDB_VERSION 11
/* Defines related to the dump file format. To store 32 bits lengths for short
* keys requires a lot of space, so we check the most significant 2 bits of
@@ -95,10 +95,11 @@
#define RDB_TYPE_ZSET_LISTPACK 17
#define RDB_TYPE_LIST_QUICKLIST_2 18
#define RDB_TYPE_STREAM_LISTPACKS_2 19
+#define RDB_TYPE_SET_LISTPACK 20
/* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */
/* Test if a type is an object type. */
-#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 19))
+#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 20))
/* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */
#define RDB_OPCODE_FUNCTION2 245 /* function library data */
diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c
index cedfd84cb..a537c6dae 100644
--- a/src/redis-check-rdb.c
+++ b/src/redis-check-rdb.c
@@ -97,7 +97,8 @@ char *rdb_type_string[] = {
"stream",
"hash-listpack",
"zset-listpack",
- "quicklist-v2"
+ "quicklist-v2",
+ "set-listpack",
};
/* Show a few stats collected into 'rdbstate' */
diff --git a/src/server.h b/src/server.h
index 4dfaaea7b..c96c1c743 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1850,6 +1850,8 @@ struct redisServer {
size_t hash_max_listpack_entries;
size_t hash_max_listpack_value;
size_t set_max_intset_entries;
+ size_t set_max_listpack_entries;
+ size_t set_max_listpack_value;
size_t zset_max_listpack_entries;
size_t zset_max_listpack_value;
size_t hll_sparse_max_bytes;
@@ -2331,6 +2333,7 @@ typedef struct {
int encoding;
int ii; /* intset iterator */
dictIterator *di;
+ unsigned char *lpi; /* listpack iterator */
} setTypeIterator;
/* Structure to hold hash iteration abstraction. Note that iteration over
@@ -2655,6 +2658,7 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
robj *createQuicklistObject(void);
robj *createSetObject(void);
robj *createIntsetObject(void);
+robj *createSetListpackObject(void);
robj *createHashObject(void);
robj *createZsetObject(void);
robj *createZsetListpackObject(void);
@@ -2980,9 +2984,9 @@ int setTypeRemove(robj *subject, sds value);
int setTypeIsMember(robj *subject, sds value);
setTypeIterator *setTypeInitIterator(robj *subject);
void setTypeReleaseIterator(setTypeIterator *si);
-int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele);
sds setTypeNextObject(setTypeIterator *si);
-int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
unsigned long setTypeSize(const robj *subject);
void setTypeConvert(robj *subject, int enc);
diff --git a/src/t_set.c b/src/t_set.c
index 8a669fe7c..8ab602791 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -29,6 +29,12 @@
#include "server.h"
+/* Internal prototypes */
+
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeRemoveAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
+
/*-----------------------------------------------------------------------------
* Set Commands
*----------------------------------------------------------------------------*/
@@ -42,45 +48,154 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *setTypeCreate(sds value) {
if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
return createIntsetObject();
- return createSetObject();
+ return createSetListpackObject();
+}
+
+/* Return the maximum number of entries to store in an intset. */
+static size_t intsetMaxEntries(void) {
+ size_t max_entries = server.set_max_intset_entries;
+ /* limit to 1G entries due to intset internals. */
+ if (max_entries >= 1<<30) max_entries = 1<<30;
+ return max_entries;
}
-/* Add the specified value into a set.
+/* Converts intset to HT if it contains too many entries. */
+static void maybeConvertIntset(robj *subject) {
+ serverAssert(subject->encoding == OBJ_ENCODING_INTSET);
+ if (intsetLen(subject->ptr) > intsetMaxEntries())
+ setTypeConvert(subject,OBJ_ENCODING_HT);
+}
+
+/* When you know all set elements are integers, call this to convert the set to
+ * an intset. No conversion happens if the set contains too many entries for an
+ * intset. */
+static void maybeConvertToIntset(robj *set) {
+ if (set->encoding == OBJ_ENCODING_INTSET) return; /* already intset */
+ if (setTypeSize(set) > intsetMaxEntries()) return; /* can't use intset */
+ intset *is = intsetNew();
+ char *str;
+ size_t len;
+ int64_t llval;
+ setTypeIterator *si = setTypeInitIterator(set);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str) {
+ /* If the element is returned as a string, we may be able to convert
+ * it to integer. This happens for OBJ_ENCODING_HT. */
+ serverAssert(string2ll(str, len, (long long *)&llval));
+ }
+ uint8_t success = 0;
+ is = intsetAdd(is, llval, &success);
+ serverAssert(success);
+ }
+ setTypeReleaseIterator(si);
+ freeSetObject(set); /* frees the internals but not robj itself */
+ set->ptr = is;
+ set->encoding = OBJ_ENCODING_INTSET;
+}
+
+/* Add the specified sds value into a set.
*
* If the value was already member of the set, nothing is done and 0 is
* returned, otherwise the new element is added and 1 is returned. */
int setTypeAdd(robj *subject, sds value) {
- long long llval;
- if (subject->encoding == OBJ_ENCODING_HT) {
- dict *ht = subject->ptr;
- dictEntry *de = dictAddRaw(ht,value,NULL);
- if (de) {
- dictSetKey(ht,de,sdsdup(value));
+ return setTypeAddAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Add member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was added and 0 if it was already a member. */
+int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET) {
+ uint8_t success = 0;
+ set->ptr = intsetAdd(set->ptr, llval, &success);
+ if (success) maybeConvertIntset(set);
+ return success;
+ }
+ /* Convert int to string. */
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ serverAssert(str);
+ if (set->encoding == OBJ_ENCODING_HT) {
+ /* Avoid duping the string if it is an sds string. */
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ dict *ht = set->ptr;
+ dictEntry *de = dictAddRaw(ht,sdsval,NULL);
+ if (de && sdsval == str) {
+ /* String was added but we don't own this sds string. Dup it and
+ * replace it in the dict entry. */
+ dictSetKey(ht,de,sdsdup((sds)str));
dictSetVal(ht,de,NULL);
+ } else if (!de && sdsval != str) {
+ /* String was already a member. Free our temporary sds copy. */
+ sdsfree(sdsval);
+ }
+ return (de != NULL);
+ } else if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p != NULL)
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p == NULL) {
+ /* Not found. */
+ if (lpLength(lp) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ lpSafeToAdd(lp, len))
+ {
+ if (str == tmpbuf) {
+ /* This came in as integer so we can avoid parsing it again.
+ * TODO: Create and use lpFindInteger; don't go via string. */
+ lp = lpAppendInteger(lp, llval);
+ } else {
+ lp = lpAppend(lp, (unsigned char*)str, len);
+ }
+ set->ptr = lp;
+ } else {
+ /* Size limit is reached. Convert to hashtable and add. */
+ setTypeConvert(set, OBJ_ENCODING_HT);
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ }
return 1;
}
- } else if (subject->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long value;
+ if (string2ll(str, len, &value)) {
uint8_t success = 0;
- subject->ptr = intsetAdd(subject->ptr,llval,&success);
+ set->ptr = intsetAdd(set->ptr,value,&success);
if (success) {
- /* Convert to regular set when the intset contains
- * too many entries. */
- size_t max_entries = server.set_max_intset_entries;
- /* limit to 1G entries due to intset internals. */
- if (max_entries >= 1<<30) max_entries = 1<<30;
- if (intsetLen(subject->ptr) > max_entries)
- setTypeConvert(subject,OBJ_ENCODING_HT);
+ maybeConvertIntset(set);
return 1;
}
} else {
- /* Failed to get integer from object, convert to regular set. */
- setTypeConvert(subject,OBJ_ENCODING_HT);
-
- /* The set *was* an intset and this value is not integer
- * encodable, so dictAdd should always work. */
- serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
- return 1;
+ size_t maxelelen = intsetLen(set->ptr) == 0 ?
+ 0 : max(sdigits10(intsetMax(set->ptr)),
+ sdigits10(intsetMin(set->ptr)));
+ if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries &&
+ len <= server.set_max_listpack_value &&
+ maxelelen <= server.set_max_listpack_value &&
+ lpSafeToAdd(NULL, maxelelen * intsetLen(set->ptr) + len))
+ {
+ /* In the "safe to add" check above we assumed all elements in
+ * the intset are of size maxelelen. This is an upper bound. */
+ setTypeConvert(set, OBJ_ENCODING_LISTPACK);
+ unsigned char *lp = set->ptr;
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ set->ptr = lp;
+ return 1;
+ } else {
+ setTypeConvert(set, OBJ_ENCODING_HT);
+ /* The set *was* an intset and this value is not integer
+ * encodable, so dictAdd should always work. */
+ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
+ return 1;
+ }
}
} else {
serverPanic("Unknown set encoding");
@@ -88,15 +203,50 @@ int setTypeAdd(robj *subject, sds value) {
return 0;
}
+/* Deletes a value provided as an sds string from the set. Returns 1 if the
+ * value was deleted and 0 if it was not a member of the set. */
int setTypeRemove(robj *setobj, sds value) {
- long long llval;
+ return setTypeRemoveAux(setobj, value, sdslen(value), 0, 1);
+}
+
+/* Remove a member. This function is optimized for the different encodings. The
+ * value can be provided as an sds string (indicated by passing str_is_sds =
+ * 1), as string and length (str_is_sds = 0) or as an integer in which case str
+ * is set to NULL and llval is provided instead.
+ *
+ * Returns 1 if the value was deleted and 0 if it was not a member of the set. */
+int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (setobj->encoding == OBJ_ENCODING_INTSET) {
+ int success;
+ setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
+ return success;
+ }
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
if (setobj->encoding == OBJ_ENCODING_HT) {
- if (dictDelete(setobj->ptr,value) == DICT_OK) {
- if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
+ sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
+ int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
+ if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr);
+ if (sdsval != str) sdsfree(sdsval); /* free temp copy */
+ return deleted;
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ unsigned char *p = lpFirst(lp);
+ if (p == NULL) return 0;
+ p = lpFind(lp, p, (unsigned char*)str, len, 0);
+ if (p != NULL) {
+ lp = lpDelete(lp, p, NULL);
+ setobj->ptr = lp;
return 1;
}
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
+ long long llval;
+ if (string2ll(str, len, &llval)) {
int success;
setobj->ptr = intsetRemove(setobj->ptr,llval,&success);
if (success) return 1;
@@ -107,18 +257,45 @@ int setTypeRemove(robj *setobj, sds value) {
return 0;
}
+/* Check if an sds string is a member of the set. Returns 1 if the value is a
+ * member of the set and 0 if it isn't. */
int setTypeIsMember(robj *subject, sds value) {
- long long llval;
- if (subject->encoding == OBJ_ENCODING_HT) {
- return dictFind((dict*)subject->ptr,value) != NULL;
- } else if (subject->encoding == OBJ_ENCODING_INTSET) {
- if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
- return intsetFind((intset*)subject->ptr,llval);
- }
+ return setTypeIsMemberAux(subject, value, sdslen(value), 0, 1);
+}
+
+/* Membership checking optimized for the different encodings. The value can be
+ * provided as an sds string (indicated by passing str_is_sds = 1), as string
+ * and length (str_is_sds = 0) or as an integer in which case str is set to NULL
+ * and llval is provided instead.
+ *
+ * Returns 1 if the value is a member of the set and 0 if it isn't. */
+int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) {
+ char tmpbuf[LONG_STR_SIZE];
+ if (!str) {
+ if (set->encoding == OBJ_ENCODING_INTSET)
+ return intsetFind(set->ptr, llval);
+ len = ll2string(tmpbuf, sizeof tmpbuf, llval);
+ str = tmpbuf;
+ str_is_sds = 0;
+ }
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ return p && lpFind(lp, p, (unsigned char*)str, len, 0);
+ } else if (set->encoding == OBJ_ENCODING_INTSET) {
+ long long llval;
+ return string2ll(str, len, &llval) && intsetFind(set->ptr, llval);
+ } else if (set->encoding == OBJ_ENCODING_HT && str_is_sds) {
+ return dictFind(set->ptr, (sds)str) != NULL;
+ } else if (set->encoding == OBJ_ENCODING_HT) {
+ sds sdsval = sdsnewlen(str, len);
+ int result = dictFind(set->ptr, sdsval) != NULL;
+ sdsfree(sdsval);
+ return result;
} else {
serverPanic("Unknown set encoding");
}
- return 0;
}
setTypeIterator *setTypeInitIterator(robj *subject) {
@@ -129,6 +306,8 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
si->di = dictGetIterator(subject->ptr);
} else if (si->encoding == OBJ_ENCODING_INTSET) {
si->ii = 0;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ si->lpi = NULL;
} else {
serverPanic("Unknown set encoding");
}
@@ -142,28 +321,50 @@ void setTypeReleaseIterator(setTypeIterator *si) {
}
/* Move to the next entry in the set. Returns the object at the current
- * position.
+ * position, as a string or as an integer.
*
- * Since set elements can be internally be stored as SDS strings or
+ * Since set elements can be internally be stored as SDS strings, char buffers or
* simple arrays of integers, setTypeNext returns the encoding of the
- * set object you are iterating, and will populate the appropriate pointer
- * (sdsele) or (llele) accordingly.
+ * set object you are iterating, and will populate the appropriate pointers
+ * (str and len) or (llele) depending on whether the value is stored as a string
+ * or as an integer internally.
+ *
+ * If OBJ_ENCODING_HT is returned, then str points to an sds string and can be
+ * used as such. If OBJ_ENCODING_INTSET, then llele is populated and str is
+ * pointed to NULL. If OBJ_ENCODING_LISTPACK is returned, the value can be
+ * either a string or an integer. If *str is not NULL, then str and len are
+ * populated with the string content and length. Otherwise, llele populated with
+ * an integer value.
*
- * Note that both the sdsele and llele pointers should be passed and cannot
+ * Note that str, len and llele pointers should all be passed and cannot
* be NULL since the function will try to defensively populate the non
* used field with values which are easy to trap if misused.
*
- * When there are no longer elements -1 is returned. */
-int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
+ * When there are no more elements -1 is returned. */
+int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
if (si->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -1;
- *sdsele = dictGetKey(de);
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
*llele = -123456789; /* Not needed. Defensive. */
} else if (si->encoding == OBJ_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -1;
- *sdsele = NULL; /* Not needed. Defensive. */
+ *str = NULL;
+ } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = si->subject->ptr;
+ unsigned char *lpi = si->lpi;
+ if (lpi == NULL) {
+ lpi = lpFirst(lp);
+ } else {
+ lpi = lpNext(lp, lpi);
+ }
+ if (lpi == NULL) return -1;
+ si->lpi = lpi;
+ unsigned int l;
+ *str = (char *)lpGetValue(lpi, &l, (long long *)llele);
+ *len = (size_t)l;
} else {
serverPanic("Wrong set encoding in setTypeNext");
}
@@ -179,54 +380,85 @@ int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
* an issue. */
sds setTypeNextObject(setTypeIterator *si) {
int64_t intele;
- sds sdsele;
- int encoding;
+ char *str;
+ size_t len;
- encoding = setTypeNext(si,&sdsele,&intele);
- switch(encoding) {
- case -1: return NULL;
- case OBJ_ENCODING_INTSET:
- return sdsfromlonglong(intele);
- case OBJ_ENCODING_HT:
- return sdsdup(sdsele);
- default:
- serverPanic("Unsupported encoding");
- }
- return NULL; /* just to suppress warnings */
+ if (setTypeNext(si, &str, &len, &intele) == -1) return NULL;
+ if (str != NULL) return sdsnewlen(str, len);
+ return sdsfromlonglong(intele);
}
/* Return random element from a non empty set.
* The returned element can be an int64_t value if the set is encoded
- * as an "intset" blob of integers, or an SDS string if the set
- * is a regular set.
+ * as an "intset" blob of integers, or an string.
*
- * The caller provides both pointers to be populated with the right
+ * The caller provides three pointers to be populated with the right
* object. The return value of the function is the object->encoding
- * field of the object and is used by the caller to check if the
- * int64_t pointer or the sds pointer was populated.
+ * field of the object and can be used by the caller to check if the
+ * int64_t pointer or the str and len pointers were populated, as for
+ * setTypeNext. If OBJ_ENCODING_HT is returned, str is pointed to a
+ * string which is actually an sds string and it can be used as such.
*
- * Note that both the sdsele and llele pointers should be passed and cannot
- * be NULL since the function will try to defensively populate the non
- * used field with values which are easy to trap if misused. */
-int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
+ * Note that both the str, len and llele pointers should be passed and cannot
+ * be NULL. If str is set to NULL, the value is an integer stored in llele. */
+int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele) {
if (setobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetFairRandomKey(setobj->ptr);
- *sdsele = dictGetKey(de);
+ *str = dictGetKey(de);
+ *len = sdslen(*str);
*llele = -123456789; /* Not needed. Defensive. */
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
*llele = intsetRandom(setobj->ptr);
- *sdsele = NULL; /* Not needed. Defensive. */
+ *str = NULL; /* Not needed. Defensive. */
+ } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = setobj->ptr;
+ int r = rand() % lpLength(lp);
+ unsigned char *p = lpSeek(lp, r);
+ unsigned int l;
+ *str = (char *)lpGetValue(p, &l, (long long *)llele);
+ *len = (size_t)l;
} else {
serverPanic("Unknown set encoding");
}
return setobj->encoding;
}
+/* Pops a random element and returns it as an object. */
+robj *setTypePopRandom(robj *set) {
+ robj *obj;
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Find random and delete it without re-seeking the listpack. */
+ unsigned int i = 0;
+ unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 0);
+ unsigned int len = 0; /* initialize to silence warning */
+ long long llele = 0; /* initialize to silence warning */
+ char *str = (char *)lpGetValue(p, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ set->ptr = lpDelete(set->ptr, p, NULL);
+ } else {
+ char *str;
+ size_t len = 0;
+ int64_t llele = 0;
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (str)
+ obj = createStringObject(str, len);
+ else
+ obj = createStringObjectFromLongLong(llele);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+ }
+ return obj;
+}
+
unsigned long setTypeSize(const robj *subject) {
if (subject->encoding == OBJ_ENCODING_HT) {
return dictSize((const dict*)subject->ptr);
} else if (subject->encoding == OBJ_ENCODING_INTSET) {
return intsetLen((const intset*)subject->ptr);
+ } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
+ return lpLength((unsigned char *)subject->ptr);
} else {
serverPanic("Unknown set encoding");
}
@@ -238,27 +470,44 @@ unsigned long setTypeSize(const robj *subject) {
void setTypeConvert(robj *setobj, int enc) {
setTypeIterator *si;
serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
- setobj->encoding == OBJ_ENCODING_INTSET);
+ setobj->encoding != enc);
if (enc == OBJ_ENCODING_HT) {
- int64_t intele;
dict *d = dictCreate(&setDictType);
sds element;
/* Presize the dict to avoid rehashing */
- dictExpand(d,intsetLen(setobj->ptr));
+ dictExpand(d, setTypeSize(setobj));
/* To add the elements we extract integers and create redis objects */
si = setTypeInitIterator(setobj);
- while (setTypeNext(si,&element,&intele) != -1) {
- element = sdsfromlonglong(intele);
+ while ((element = setTypeNextObject(si)) != NULL) {
serverAssert(dictAdd(d,element,NULL) == DICT_OK);
}
setTypeReleaseIterator(si);
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
setobj->encoding = OBJ_ENCODING_HT;
- zfree(setobj->ptr);
setobj->ptr = d;
+ } else if (enc == OBJ_ENCODING_LISTPACK) {
+ /* Preallocate the minimum one byte per element */
+ size_t estcap = setTypeSize(setobj);
+ unsigned char *lp = lpNew(estcap);
+ char *str;
+ size_t len;
+ int64_t llele;
+ si = setTypeInitIterator(setobj);
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str != NULL)
+ lp = lpAppend(lp, (unsigned char *)str, len);
+ else
+ lp = lpAppendInteger(lp, llele);
+ }
+ setTypeReleaseIterator(si);
+
+ freeSetObject(setobj); /* frees the internals but not setobj itself */
+ setobj->encoding = OBJ_ENCODING_LISTPACK;
+ setobj->ptr = lp;
} else {
serverPanic("Unsupported set conversion");
}
@@ -272,8 +521,6 @@ void setTypeConvert(robj *setobj, int enc) {
robj *setTypeDup(robj *o) {
robj *set;
setTypeIterator *si;
- sds elesds;
- int64_t intobj;
serverAssert(o->type == OBJ_SET);
@@ -285,13 +532,23 @@ robj *setTypeDup(robj *o) {
memcpy(newis,is,size);
set = createObject(OBJ_SET, newis);
set->encoding = OBJ_ENCODING_INTSET;
+ } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = o->ptr;
+ size_t sz = lpBytes(lp);
+ unsigned char *new_lp = zmalloc(sz);
+ memcpy(new_lp, lp, sz);
+ set = createObject(OBJ_SET, new_lp);
+ set->encoding = OBJ_ENCODING_LISTPACK;
} else if (o->encoding == OBJ_ENCODING_HT) {
set = createSetObject();
dict *d = o->ptr;
dictExpand(set->ptr, dictSize(d));
si = setTypeInitIterator(o);
- while (setTypeNext(si, &elesds, &intobj) != -1) {
- setTypeAdd(set, elesds);
+ char *str;
+ size_t len;
+ int64_t intobj;
+ while (setTypeNext(si, &str, &len, &intobj) != -1) {
+ setTypeAdd(set, (sds)str);
}
setTypeReleaseIterator(si);
} else {
@@ -509,9 +766,9 @@ void spopWithCountCommand(client *c) {
addReplySetLen(c,count);
/* Common iteration vars. */
- sds sdsele;
robj *objele;
- int encoding;
+ char *str;
+ size_t len;
int64_t llele;
unsigned long remaining = size-count; /* Elements left after SPOP. */
@@ -522,24 +779,49 @@ void spopWithCountCommand(client *c) {
* CASE 2: The number of elements to return is small compared to the
* set size. We can just extract random elements and return them to
* the set. */
- if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
- while(count--) {
- /* Emit and remove. */
- encoding = setTypeRandomElement(set,&sdsele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
- addReplyBulkLongLong(c,llele);
- objele = createStringObjectFromLongLong(llele);
- set->ptr = intsetRemove(set->ptr,llele,NULL);
+ if (remaining*SPOP_MOVE_STRATEGY_MUL > count &&
+ set->encoding == OBJ_ENCODING_LISTPACK)
+ {
+ /* Specialized case for listpack. Traverse it only once. */
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * count);
+ for (unsigned long i = 0; i < count; i++) {
+ p = lpNextRandom(lp, p, &index, count - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+
+ if (str) {
+ addReplyBulkCBuffer(c, str, len);
+ objele = createStringObject(str, len);
} else {
- addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
- objele = createStringObject(sdsele,sdslen(sdsele));
- setTypeRemove(set,sdsele);
+ addReplyBulkLongLong(c, llele);
+ objele = createStringObjectFromLongLong(llele);
}
/* Replicate/AOF this command as an SREM operation */
propargv[2] = objele;
alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL);
decrRefCount(objele);
+
+ /* Store pointer for later deletion and move to next. */
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, count);
+ zfree(ps);
+ set->ptr = lp;
+ } else if (remaining*SPOP_MOVE_STRATEGY_MUL > count) {
+ while(count--) {
+ objele = setTypePopRandom(set);
+ addReplyBulk(c, objele);
+
+ /* Replicate/AOF this command as an SREM operation */
+ propargv[2] = objele;
+ alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL);
+ decrRefCount(objele);
}
} else {
/* CASE 3: The number of elements to return is very big, approaching
@@ -553,29 +835,46 @@ void spopWithCountCommand(client *c) {
robj *newset = NULL;
/* Create a new set with just the remaining elements. */
- while(remaining--) {
- encoding = setTypeRandomElement(set,&sdsele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
- sdsele = sdsfromlonglong(llele);
- } else {
- sdsele = sdsdup(sdsele);
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ /* Specialized case for listpack. Traverse it only once. */
+ newset = createSetListpackObject();
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int index = 0;
+ unsigned char **ps = zmalloc(sizeof(char *) * remaining);
+ for (unsigned long i = 0; i < remaining; i++) {
+ p = lpNextRandom(lp, p, &index, remaining - i, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ setTypeAddAux(newset, str, len, llele, 0);
+ ps[i] = p;
+ p = lpNext(lp, p);
+ index++;
+ }
+ lp = lpBatchDelete(lp, ps, remaining);
+ zfree(ps);
+ set->ptr = lp;
+ } else {
+ while(remaining--) {
+ int encoding = setTypeRandomElement(set, &str, &len, &llele);
+ if (!newset) {
+ newset = str ? createSetListpackObject() : createIntsetObject();
+ }
+ setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HT);
+ setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
}
- if (!newset) newset = setTypeCreate(sdsele);
- setTypeAdd(newset,sdsele);
- setTypeRemove(set,sdsele);
- sdsfree(sdsele);
}
/* Transfer the old set to the client. */
setTypeIterator *si;
si = setTypeInitIterator(set);
- while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) {
- if (encoding == OBJ_ENCODING_INTSET) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
objele = createStringObjectFromLongLong(llele);
} else {
- addReplyBulkCBuffer(c,sdsele,sdslen(sdsele));
- objele = createStringObject(sdsele,sdslen(sdsele));
+ addReplyBulkCBuffer(c, str, len);
+ objele = createStringObject(str, len);
}
/* Replicate/AOF this command as an SREM operation */
@@ -599,9 +898,6 @@ void spopWithCountCommand(client *c) {
void spopCommand(client *c) {
robj *set, *ele;
- sds sdsele;
- int64_t llele;
- int encoding;
if (c->argc == 3) {
spopWithCountCommand(c);
@@ -616,17 +912,8 @@ void spopCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
- /* Get a random element from the set */
- encoding = setTypeRandomElement(set,&sdsele,&llele);
-
- /* Remove the element from the set */
- if (encoding == OBJ_ENCODING_INTSET) {
- ele = createStringObjectFromLongLong(llele);
- set->ptr = intsetRemove(set->ptr,llele,NULL);
- } else {
- ele = createStringObject(sdsele,sdslen(sdsele));
- setTypeRemove(set,ele->ptr);
- }
+ /* Pop a random element from the set */
+ ele = setTypePopRandom(set);
notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
@@ -634,7 +921,7 @@ void spopCommand(client *c) {
rewriteClientCommandVector(c,3,shared.srem,c->argv[1],ele);
/* Add the element to the reply */
- addReplyBulk(c,ele);
+ addReplyBulk(c, ele);
decrRefCount(ele);
/* Delete the set if it's empty */
@@ -661,9 +948,9 @@ void srandmemberWithCountCommand(client *c) {
unsigned long count, size;
int uniq = 1;
robj *set;
- sds ele;
+ char *str;
+ size_t len;
int64_t llele;
- int encoding;
dict *d;
@@ -694,12 +981,27 @@ void srandmemberWithCountCommand(client *c) {
* elements in random order. */
if (!uniq || count == 1) {
addReplyArrayLen(c,count);
+
+ if (set->encoding == OBJ_ENCODING_LISTPACK && count > 1) {
+ /* Specialized case for listpack, traversing it only once. */
+ listpackEntry *entries = zmalloc(count * sizeof(listpackEntry));
+ lpRandomEntries(set->ptr, count, entries);
+ for (unsigned long i = 0; i < count; i++) {
+ if (entries[i].sval)
+ addReplyBulkCBuffer(c, entries[i].sval, entries[i].slen);
+ else
+ addReplyBulkLongLong(c, entries[i].lval);
+ }
+ zfree(entries);
+ return;
+ }
+
while(count--) {
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
}
return;
@@ -712,11 +1014,11 @@ void srandmemberWithCountCommand(client *c) {
setTypeIterator *si;
addReplyArrayLen(c,size);
si = setTypeInitIterator(set);
- while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
- if (encoding == OBJ_ENCODING_INTSET) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
size--;
}
@@ -725,6 +1027,31 @@ void srandmemberWithCountCommand(client *c) {
return;
}
+ /* CASE 2.5 listpack only. Sampling unique elements, in non-random order.
+ * Listpack encoded sets are meant to be relatively small, so
+ * SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make
+ * copies of the entries. Instead, we emit them directly to the output
+ * buffer. */
+ if (set->encoding == OBJ_ENCODING_LISTPACK) {
+ unsigned char *lp = set->ptr;
+ unsigned char *p = lpFirst(lp);
+ unsigned int i = 0;
+ addReplyArrayLen(c, count);
+ while (count) {
+ p = lpNextRandom(lp, p, &i, count--, 0);
+ unsigned int len;
+ str = (char *)lpGetValue(p, &len, (long long *)&llele);
+ if (str == NULL) {
+ addReplyBulkLongLong(c, llele);
+ } else {
+ addReplyBulkCBuffer(c, str, len);
+ }
+ p = lpNext(lp, p);
+ i++;
+ }
+ return;
+ }
+
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
d = dictCreate(&sdsReplyDictType);
@@ -743,13 +1070,13 @@ void srandmemberWithCountCommand(client *c) {
/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
dictExpand(d, size);
- while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
+ while (setTypeNext(si, &str, &len, &llele) != -1) {
int retval = DICT_ERR;
- if (encoding == OBJ_ENCODING_INTSET) {
+ if (str == NULL) {
retval = dictAdd(d,sdsfromlonglong(llele),NULL);
} else {
- retval = dictAdd(d,sdsdup(ele),NULL);
+ retval = dictAdd(d, sdsnewlen(str, len), NULL);
}
serverAssert(retval == DICT_OK);
}
@@ -777,11 +1104,11 @@ void srandmemberWithCountCommand(client *c) {
dictExpand(d, count);
while (added < count) {
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
sdsele = sdsfromlonglong(llele);
} else {
- sdsele = sdsdup(ele);
+ sdsele = sdsnewlen(str, len);
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
@@ -810,9 +1137,9 @@ void srandmemberWithCountCommand(client *c) {
/* SRANDMEMBER <key> [<count>] */
void srandmemberCommand(client *c) {
robj *set;
- sds ele;
+ char *str;
+ size_t len;
int64_t llele;
- int encoding;
if (c->argc == 3) {
srandmemberWithCountCommand(c);
@@ -826,11 +1153,11 @@ void srandmemberCommand(client *c) {
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
- encoding = setTypeRandomElement(set,&ele,&llele);
- if (encoding == OBJ_ENCODING_INTSET) {
+ setTypeRandomElement(set, &str, &len, &llele);
+ if (str == NULL) {
addReplyBulkLongLong(c,llele);
} else {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
+ addReplyBulkCBuffer(c, str, len);
}
}
@@ -866,7 +1193,8 @@ void sinterGenericCommand(client *c, robj **setkeys,
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
- sds elesds;
+ char *str;
+ size_t len;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = 0;
@@ -918,7 +1246,24 @@ void sinterGenericCommand(client *c, robj **setkeys,
if (dstkey) {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
- dstset = createIntsetObject();
+ if (sets[0]->encoding == OBJ_ENCODING_INTSET) {
+ /* The first set is an intset, so the result is an intset too. The
+ * elements are inserted in ascending order which is efficient in an
+ * intset. */
+ dstset = createIntsetObject();
+ } else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) {
+ /* To avoid many reallocs, we estimate that the result is a listpack
+ * of approximately the same size as the first set. Then we shrink
+ * it or possibly convert it to intset it in the end. */
+ unsigned char *lp = lpNew(lpBytes(sets[0]->ptr));
+ dstset = createObject(OBJ_SET, lp);
+ dstset->encoding = OBJ_ENCODING_LISTPACK;
+ } else {
+ /* We start off with a listpack, since it's more efficient to append
+ * to than an intset. Later we can convert it to intset or a
+ * hashtable. */
+ dstset = createSetListpackObject();
+ }
} else if (!cardinality_only) {
replylen = addReplyDeferredLen(c);
}
@@ -926,32 +1271,14 @@ void sinterGenericCommand(client *c, robj **setkeys,
/* Iterate all the elements of the first (smallest) set, and test
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
+ int only_integers = 1;
si = setTypeInitIterator(sets[0]);
- while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) {
+ while((encoding = setTypeNext(si, &str, &len, &intobj)) != -1) {
for (j = 1; j < setnum; j++) {
if (sets[j] == sets[0]) continue;
- if (encoding == OBJ_ENCODING_INTSET) {
- /* intset with intset is simple... and fast */
- if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
- !intsetFind((intset*)sets[j]->ptr,intobj))
- {
- break;
- /* in order to compare an integer with an object we
- * have to use the generic function, creating an object
- * for this */
- } else if (sets[j]->encoding == OBJ_ENCODING_HT) {
- elesds = sdsfromlonglong(intobj);
- if (!setTypeIsMember(sets[j],elesds)) {
- sdsfree(elesds);
- break;
- }
- sdsfree(elesds);
- }
- } else if (encoding == OBJ_ENCODING_HT) {
- if (!setTypeIsMember(sets[j],elesds)) {
- break;
- }
- }
+ if (!setTypeIsMemberAux(sets[j], str, len, intobj,
+ encoding == OBJ_ENCODING_HT))
+ break;
}
/* Only take action when all sets contain the member */
@@ -963,19 +1290,29 @@ void sinterGenericCommand(client *c, robj **setkeys,
if (limit && cardinality >= limit)
break;
} else if (!dstkey) {
- if (encoding == OBJ_ENCODING_HT)
- addReplyBulkCBuffer(c,elesds,sdslen(elesds));
+ if (str != NULL)
+ addReplyBulkCBuffer(c, str, len);
else
addReplyBulkLongLong(c,intobj);
cardinality++;
} else {
- if (encoding == OBJ_ENCODING_INTSET) {
- elesds = sdsfromlonglong(intobj);
- setTypeAdd(dstset,elesds);
- sdsfree(elesds);
- } else {
- setTypeAdd(dstset,elesds);
+ if (str && only_integers) {
+ /* It may be an integer although we got it as a string. */
+ if (encoding == OBJ_ENCODING_HT &&
+ string2ll(str, len, (long long *)&intobj))
+ {
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK ||
+ dstset->encoding == OBJ_ENCODING_INTSET)
+ {
+ /* Adding it as an integer is more efficient. */
+ str = NULL;
+ }
+ } else {
+ /* It's not an integer */
+ only_integers = 0;
+ }
}
+ setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HT);
}
}
}
@@ -987,6 +1324,12 @@ void sinterGenericCommand(client *c, robj **setkeys,
/* Store the resulting set into the target, if the intersection
* is not an empty set. */
if (setTypeSize(dstset) > 0) {
+ if (only_integers) maybeConvertToIntset(dstset);
+ if (dstset->encoding == OBJ_ENCODING_LISTPACK) {
+ /* We allocated too much memory when we created it to avoid
+ * frequent reallocs. Therefore, we shrink it now. */
+ dstset->ptr = lpShrinkToFit(dstset->ptr);
+ }
setKey(c,c->db,dstkey,dstset,0);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
@@ -1054,7 +1397,10 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
- sds ele;
+ char *str;
+ size_t len;
+ int64_t llval;
+ int encoding;
int j, cardinality = 0;
int diff_algo = 1;
int sameset = 0;
@@ -1120,9 +1466,8 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
- while((ele = setTypeNextObject(si)) != NULL) {
- if (setTypeAdd(dstset,ele)) cardinality++;
- sdsfree(ele);
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
+ cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
}
setTypeReleaseIterator(si);
}
@@ -1138,18 +1483,19 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
* This way we perform at max N*M operations, where N is the size of
* the first set, and M the number of sets. */
si = setTypeInitIterator(sets[0]);
- while((ele = setTypeNextObject(si)) != NULL) {
+ while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
for (j = 1; j < setnum; j++) {
if (!sets[j]) continue; /* no key is an empty set. */
if (sets[j] == sets[0]) break; /* same set! */
- if (setTypeIsMember(sets[j],ele)) break;
+ if (setTypeIsMemberAux(sets[j], str, len, llval,
+ encoding == OBJ_ENCODING_HT))
+ break;
}
if (j == setnum) {
/* There is no other set with this element. Add it. */
- setTypeAdd(dstset,ele);
+ setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
cardinality++;
}
- sdsfree(ele);
}
setTypeReleaseIterator(si);
} else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) {
@@ -1164,13 +1510,14 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!sets[j]) continue; /* non existing keys are like empty sets */
si = setTypeInitIterator(sets[j]);
- while((ele = setTypeNextObject(si)) != NULL) {
+ while((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
if (j == 0) {
- if (setTypeAdd(dstset,ele)) cardinality++;
+ cardinality += setTypeAddAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
} else {
- if (setTypeRemove(dstset,ele)) cardinality--;
+ cardinality -= setTypeRemoveAux(dstset, str, len, llval,
+ encoding == OBJ_ENCODING_HT);
}
- sdsfree(ele);
}
setTypeReleaseIterator(si);
@@ -1184,9 +1531,11 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
if (!dstkey) {
addReplySetLen(c,cardinality);
si = setTypeInitIterator(dstset);
- while((ele = setTypeNextObject(si)) != NULL) {
- addReplyBulkCBuffer(c,ele,sdslen(ele));
- sdsfree(ele);
+ while (setTypeNext(si, &str, &len, &llval) != -1) {
+ if (str)
+ addReplyBulkCBuffer(c, str, len);
+ else
+ addReplyBulkLongLong(c, llval);
}
setTypeReleaseIterator(si);
server.lazyfree_lazy_server_del ? freeObjAsync(NULL, dstset, -1) :
diff --git a/src/t_zset.c b/src/t_zset.c
index e194860db..ed1830175 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -1971,6 +1971,10 @@ typedef struct {
dictIterator *di;
dictEntry *de;
} ht;
+ struct {
+ unsigned char *lp;
+ unsigned char *p;
+ } lp;
} set;
/* Sorted set iterators. */
@@ -2025,6 +2029,9 @@ void zuiInitIterator(zsetopsrc *op) {
it->ht.dict = op->subject->ptr;
it->ht.di = dictGetIterator(op->subject->ptr);
it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ it->lp.lp = op->subject->ptr;
+ it->lp.p = lpFirst(it->lp.lp);
} else {
serverPanic("Unknown set encoding");
}
@@ -2061,6 +2068,8 @@ void zuiClearIterator(zsetopsrc *op) {
UNUSED(it); /* skip */
} else if (op->encoding == OBJ_ENCODING_HT) {
dictReleaseIterator(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ UNUSED(it);
} else {
serverPanic("Unknown set encoding");
}
@@ -2091,14 +2100,7 @@ unsigned long zuiLength(zsetopsrc *op) {
return 0;
if (op->type == OBJ_SET) {
- if (op->encoding == OBJ_ENCODING_INTSET) {
- return intsetLen(op->subject->ptr);
- } else if (op->encoding == OBJ_ENCODING_HT) {
- dict *ht = op->subject->ptr;
- return dictSize(ht);
- } else {
- serverPanic("Unknown set encoding");
- }
+ return setTypeSize(op->subject);
} else if (op->type == OBJ_ZSET) {
if (op->encoding == OBJ_ENCODING_LISTPACK) {
return zzlLength(op->subject->ptr);
@@ -2144,6 +2146,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {
/* Move to next element. */
it->ht.de = dictNext(it->ht.di);
+ } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
+ if (it->lp.p == NULL)
+ return 0;
+ val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell);
+ val->score = 1.0;
+
+ /* Move to next element. */
+ it->lp.p = lpNext(it->lp.lp, it->lp.p);
} else {
serverPanic("Unknown set encoding");
}
diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl
index f5f971140..97ffef6bd 100644
--- a/tests/unit/keyspace.tcl
+++ b/tests/unit/keyspace.tcl
@@ -256,30 +256,27 @@ start_server {tags {"keyspace"}} {
assert_equal $digest [debug_digest_value mynewlist{t}]
}
- test {COPY basic usage for intset set} {
- r del set1{t} newset1{t}
- r sadd set1{t} 1 2 3
- assert_encoding intset set1{t}
- r copy set1{t} newset1{t}
- set digest [debug_digest_value set1{t}]
- assert_equal $digest [debug_digest_value newset1{t}]
- assert_refcount 1 set1{t}
- assert_refcount 1 newset1{t}
- r del set1{t}
- assert_equal $digest [debug_digest_value newset1{t}]
- }
-
- test {COPY basic usage for hashtable set} {
- r del set2{t} newset2{t}
- r sadd set2{t} 1 2 3 a
- assert_encoding hashtable set2{t}
- r copy set2{t} newset2{t}
- set digest [debug_digest_value set2{t}]
- assert_equal $digest [debug_digest_value newset2{t}]
- assert_refcount 1 set2{t}
- assert_refcount 1 newset2{t}
- r del set2{t}
- assert_equal $digest [debug_digest_value newset2{t}]
+ foreach type {intset listpack hashtable} {
+ test {COPY basic usage for $type set} {
+ r del set1{t} newset1{t}
+ r sadd set1{t} 1 2 3
+ if {$type ne "intset"} {
+ r sadd set1{t} a
+ }
+ if {$type eq "hashtable"} {
+ for {set i 4} {$i < 200} {incr i} {
+ r sadd set1{t} $i
+ }
+ }
+ assert_encoding $type set1{t}
+ r copy set1{t} newset1{t}
+ set digest [debug_digest_value set1{t}]
+ assert_equal $digest [debug_digest_value newset1{t}]
+ assert_refcount 1 set1{t}
+ assert_refcount 1 newset1{t}
+ r del set1{t}
+ assert_equal $digest [debug_digest_value newset1{t}]
+ }
}
test {COPY basic usage for listpack sorted set} {
diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl
index 5dea76be7..45397d7a3 100644
--- a/tests/unit/scan.tcl
+++ b/tests/unit/scan.tcl
@@ -98,7 +98,7 @@ start_server {tags {"scan network"}} {
assert_equal 1000 [llength $keys]
}
- foreach enc {intset hashtable} {
+ foreach enc {intset listpack hashtable} {
test "SSCAN with encoding $enc" {
# Create the Set
r del set
@@ -107,8 +107,9 @@ start_server {tags {"scan network"}} {
} else {
set prefix "ele:"
}
+ set count [expr {$enc eq "hashtable" ? 200 : 100}]
set elements {}
- for {set j 0} {$j < 100} {incr j} {
+ for {set j 0} {$j < $count} {incr j} {
lappend elements ${prefix}${j}
}
r sadd set {*}$elements
@@ -128,7 +129,7 @@ start_server {tags {"scan network"}} {
}
set keys [lsort -unique $keys]
- assert_equal 100 [llength $keys]
+ assert_equal $count [llength $keys]
}
}
diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl
index 30b6dc5d7..34c2ea76d 100644
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@@ -2,6 +2,8 @@ start_server {
tags {"set"}
overrides {
"set-max-intset-entries" 512
+ "set-max-listpack-entries" 128
+ "set-max-listpack-value" 32
}
} {
proc create_set {key entries} {
@@ -9,12 +11,19 @@ start_server {
foreach entry $entries { r sadd $key $entry }
}
- test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - regular set} {
- create_set myset {foo}
- assert_encoding hashtable myset
+ # Values for initialing sets, per encoding.
+ array set initelems {listpack {foo} hashtable {foo}}
+ for {set i 0} {$i < 130} {incr i} {
+ lappend initelems(hashtable) [format "i%03d" $i]
+ }
+
+ foreach type {listpack hashtable} {
+ test "SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - $type" {
+ create_set myset $initelems($type)
+ assert_encoding $type myset
assert_equal 1 [r sadd myset bar]
assert_equal 0 [r sadd myset bar]
- assert_equal 2 [r scard myset]
+ assert_equal [expr [llength $initelems($type)] + 1] [r scard myset]
assert_equal 1 [r sismember myset foo]
assert_equal 1 [r sismember myset bar]
assert_equal 0 [r sismember myset bla]
@@ -23,7 +32,8 @@ start_server {
assert_equal {1 0} [r smismember myset foo bla]
assert_equal {0 1} [r smismember myset bla foo]
assert_equal {0} [r smismember myset bla]
- assert_equal {bar foo} [lsort [r smembers myset]]
+ assert_equal "bar $initelems($type)" [lsort [r smembers myset]]
+ }
}
test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - intset} {
@@ -67,15 +77,33 @@ start_server {
assert_error WRONGTYPE* {r sadd mylist bar}
}
- test "SADD a non-integer against an intset" {
+ test "SADD a non-integer against a small intset" {
create_set myset {1 2 3}
assert_encoding intset myset
assert_equal 1 [r sadd myset a]
+ assert_encoding listpack myset
+ }
+
+ test "SADD a non-integer against a large intset" {
+ create_set myset {0}
+ for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+ assert_encoding intset myset
+ assert_equal 1 [r sadd myset a]
assert_encoding hashtable myset
}
test "SADD an integer larger than 64 bits" {
create_set myset {213244124402402314402033402}
+ assert_encoding listpack myset
+ assert_equal 1 [r sismember myset 213244124402402314402033402]
+ assert_equal {1} [r smismember myset 213244124402402314402033402]
+ }
+
+ test "SADD an integer larger than 64 bits to a large intset" {
+ create_set myset {0}
+ for {set i 1} {$i < 130} {incr i} {r sadd myset $i}
+ assert_encoding intset myset
+ r sadd myset 213244124402402314402033402
assert_encoding hashtable myset
assert_equal 1 [r sismember myset 213244124402402314402033402]
assert_equal {1} [r smismember myset 213244124402402314402033402]
@@ -100,25 +128,32 @@ start_server {
r del myintset
r del myhashset
r del mylargeintset
+ r del mysmallset
for {set i 0} {$i < 100} {incr i} { r sadd myintset $i }
for {set i 0} {$i < 1280} {incr i} { r sadd mylargeintset $i }
+ for {set i 0} {$i < 50} {incr i} { r sadd mysmallset [format "i%03d" $i] }
for {set i 0} {$i < 256} {incr i} { r sadd myhashset [format "i%03d" $i] }
assert_encoding intset myintset
assert_encoding hashtable mylargeintset
+ assert_encoding listpack mysmallset
assert_encoding hashtable myhashset
r debug reload
assert_encoding intset myintset
assert_encoding hashtable mylargeintset
+ assert_encoding listpack mysmallset
assert_encoding hashtable myhashset
} {} {needs:debug}
- test {SREM basics - regular set} {
- create_set myset {foo bar ciao}
- assert_encoding hashtable myset
- assert_equal 0 [r srem myset qux]
- assert_equal 1 [r srem myset foo]
- assert_equal {bar ciao} [lsort [r smembers myset]]
+ foreach type {listpack hashtable} {
+ test {SREM basics - $type} {
+ create_set myset $initelems($type)
+ r sadd myset ciao
+ assert_encoding $type myset
+ assert_equal 0 [r srem myset qux]
+ assert_equal 1 [r srem myset ciao]
+ assert_equal $initelems($type) [lsort [r smembers myset]]
+ }
}
test {SREM basics - intset} {
@@ -177,7 +212,18 @@ start_server {
assert_equal 0 [r sintercard 1 non-existing-key limit 10]
}
- foreach {type} {hashtable intset} {
+ foreach {type} {regular intset} {
+ # Create sets setN{t} where N = 1..5
+ if {$type eq "regular"} {
+ set smallenc listpack
+ set bigenc hashtable
+ } else {
+ set smallenc intset
+ set bigenc intset
+ }
+ # Sets 1, 2 and 4 are big; sets 3 and 5 are small.
+ array set encoding "1 $bigenc 2 $bigenc 3 $smallenc 4 $bigenc 5 $smallenc"
+
for {set i 1} {$i <= 5} {incr i} {
r del [format "set%d{t}" $i]
}
@@ -198,7 +244,7 @@ start_server {
# while the tests are running -- an extra element is added to every
# set that determines its encoding.
set large 200
- if {$type eq "hashtable"} {
+ if {$type eq "regular"} {
set large foo
}
@@ -206,9 +252,9 @@ start_server {
r sadd [format "set%d{t}" $i] $large
}
- test "Generated sets must be encoded as $type" {
+ test "Generated sets must be encoded correctly - $type" {
for {set i 1} {$i <= 5} {incr i} {
- assert_encoding $type [format "set%d{t}" $i]
+ assert_encoding $encoding($i) [format "set%d{t}" $i]
}
}
@@ -225,14 +271,14 @@ start_server {
test "SINTERSTORE with two sets - $type" {
r sinterstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $smallenc setres{t}
assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
}
test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" {
r debug reload
r sinterstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $smallenc setres{t}
assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]]
} {} {needs:debug}
@@ -243,7 +289,7 @@ start_server {
test "SUNIONSTORE with two sets - $type" {
r sunionstore setres{t} set1{t} set2{t}
- assert_encoding $type setres{t}
+ assert_encoding $bigenc setres{t}
set expected [lsort -uniq "[r smembers set1{t}] [r smembers set2{t}]"]
assert_equal $expected [lsort [r smembers setres{t}]]
}
@@ -294,6 +340,46 @@ start_server {
}
}
+ test "SINTERSTORE with two listpack sets where result is intset" {
+ r del setres{t} set1{t} set2{t}
+ r sadd set1{t} a b c 1 3 6 x y z
+ r sadd set2{t} e f g 1 2 3 u v w
+ assert_encoding listpack set1{t}
+ assert_encoding listpack set2{t}
+ r sinterstore setres{t} set1{t} set2{t}
+ assert_equal [list 1 3] [lsort [r smembers setres{t}]]
+ assert_encoding intset setres{t}
+ }
+
+ test "SINTERSTORE with two hashtable sets where result is intset" {
+ r del setres{t} set1{t} set2{t}
+ r sadd set1{t} a b c 444 555 666
+ r sadd set2{t} e f g 111 222 333
+ set expected {}
+ for {set i 1} {$i < 130} {incr i} {
+ r sadd set1{t} $i
+ r sadd set2{t} $i
+ lappend expected $i
+ }
+ assert_encoding hashtable set1{t}
+ assert_encoding hashtable set2{t}
+ r sinterstore setres{t} set1{t} set2{t}
+ assert_equal [lsort $expected] [lsort [r smembers setres{t}]]
+ assert_encoding intset setres{t}
+ }
+
+ test "SUNION hashtable and listpack" {
+ # This adds code coverage for adding a non-sds string to a hashtable set
+ # which already contains the string.
+ r del set1{t} set2{t}
+ set union {abcdefghijklmnopqrstuvwxyz1234567890 a b c 1 2 3}
+ create_set set1{t} $union
+ create_set set2{t} {a b c}
+ assert_encoding hashtable set1{t}
+ assert_encoding listpack set2{t}
+ assert_equal [lsort $union] [lsort [r sunion set1{t} set2{t}]]
+ }
+
test "SDIFF with first set empty" {
r del set1{t} set2{t} set3{t}
r sadd set2{t} 1 2 3 4
@@ -428,7 +514,7 @@ start_server {
r sadd set2{t} 1 2 3 a
r srem set2{t} a
assert_encoding intset set1{t}
- assert_encoding hashtable set2{t}
+ assert_encoding listpack set2{t}
lsort [r sinter set1{t} set2{t}]
} {1 2 3}
@@ -549,7 +635,7 @@ start_server {
assert_equal 0 [r exists setres{t}]
}
- foreach {type contents} {hashtable {a b c} intset {1 2 3}} {
+ foreach {type contents} {listpack {a b c} intset {1 2 3}} {
test "SPOP basics - $type" {
create_set myset $contents
assert_encoding $type myset
@@ -575,11 +661,20 @@ start_server {
}
}
+ test "SPOP integer from listpack set" {
+ create_set myset {a 1 2 3 4 5 6 7}
+ assert_encoding listpack myset
+ set a [r spop myset]
+ set b [r spop myset]
+ assert {[string is digit $a] || [string is digit $b]}
+ }
+
foreach {type contents} {
- hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z}
+ listpack {a b c d e f g h i j k l m n o p q r s t u v w x y z}
intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9}
+ hashtable {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 b c d e f g h i j k l m n o p q r s t u v w x y z}
} {
- test "SPOP with <count>" {
+ test "SPOP with <count> - $type" {
create_set myset $contents
assert_encoding $type myset
assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]]
@@ -610,16 +705,20 @@ start_server {
r spop nonexisting_key 100
} {}
- test "SPOP new implementation: code path #1" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ foreach {type content} {
+ intset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ listpack {a 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ } {
+ test "SPOP new implementation: code path #1 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 30]
assert {[lsort $content] eq [lsort $res]}
}
- test "SPOP new implementation: code path #2" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ test "SPOP new implementation: code path #2 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 2]
assert {[llength $res] == 2}
assert {[r scard myset] == 18}
@@ -627,15 +726,16 @@ start_server {
assert {[lsort $union] eq [lsort $content]}
}
- test "SPOP new implementation: code path #3" {
- set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20}
+ test "SPOP new implementation: code path #3 $type" {
create_set myset $content
+ assert_encoding $type myset
set res [r spop myset 18]
assert {[llength $res] == 18}
assert {[r scard myset] == 2}
set union [concat [r smembers myset] $res]
assert {[lsort $union] eq [lsort $content]}
}
+ }
test "SRANDMEMBER count of 0 is handled correctly" {
r srandmember myset 0
@@ -659,7 +759,7 @@ start_server {
r readraw 0
foreach {type contents} {
- hashtable {
+ listpack {
1 5 10 50 125 50000 33959417 4775547 65434162
12098459 427716 483706 2726473884 72615637475
MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
@@ -674,9 +774,20 @@ start_server {
30 31 32 33 34 35 36 37 38 39
40 41 42 43 44 45 46 47 48 49
}
+ hashtable {
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+ 1 5 10 50 125 50000 33959417 4775547 65434162
+ 12098459 427716 483706 2726473884 72615637475
+ MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA
+ SUSAN MARGARET DOROTHY LISA NANCY KAREN BETTY HELEN
+ SANDRA DONNA CAROL RUTH SHARON MICHELLE LAURA SARAH
+ KIMBERLY DEBORAH JESSICA SHIRLEY CYNTHIA ANGELA MELISSA
+ BRENDA AMY ANNA REBECCA VIRGINIA
+ }
} {
test "SRANDMEMBER with <count> - $type" {
create_set myset $contents
+ assert_encoding $type myset
unset -nocomplain myset
array set myset {}
foreach ele [r smembers myset] {
@@ -767,16 +878,22 @@ start_server {
}
foreach {type contents} {
- hashtable {
+ listpack {
1 5 10 50 125
MARY PATRICIA LINDA BARBARA ELIZABETH
}
intset {
0 1 2 3 4 5 6 7 8 9
}
+ hashtable {
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+ 1 5 10 50 125
+ MARY PATRICIA LINDA BARBARA
+ }
} {
test "SRANDMEMBER histogram distribution - $type" {
create_set myset $contents
+ assert_encoding $type myset
unset -nocomplain myset
array set myset {}
foreach ele [r smembers myset] {
@@ -809,7 +926,7 @@ start_server {
r del myset3{t} myset4{t}
create_set myset1{t} {1 a b}
create_set myset2{t} {2 3 4}
- assert_encoding hashtable myset1{t}
+ assert_encoding listpack myset1{t}
assert_encoding intset myset2{t}
}
@@ -819,7 +936,7 @@ start_server {
assert_equal 1 [r smove myset1{t} myset2{t} a]
assert_equal {1 b} [lsort [r smembers myset1{t}]]
assert_equal {2 3 4 a} [lsort [r smembers myset2{t}]]
- assert_encoding hashtable myset2{t}
+ assert_encoding listpack myset2{t}
# move an integer element should not convert the encoding
setup_move
@@ -855,7 +972,7 @@ start_server {
assert_equal 1 [r smove myset1{t} myset3{t} a]
assert_equal {1 b} [lsort [r smembers myset1{t}]]
assert_equal {a} [lsort [r smembers myset3{t}]]
- assert_encoding hashtable myset3{t}
+ assert_encoding listpack myset3{t}
}
test "SMOVE from intset to non existing destination set" {