5 files changed, 63 insertions, 27 deletions
diff --git a/src/listpack.c b/src/listpack.c
index f36d9d64d..8b3209be2 100644
--- a/src/listpack.c
+++ b/src/listpack.c
@@ -1221,6 +1221,15 @@ size_t lpBytes(unsigned char *lp) {
     return lpGetTotalBytes(lp);
 }
 
+/* Returns the size of a listpack consisting of an integer repeated 'rep' times. */
+size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep) {
+    uint64_t enclen;
+    unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
+    lpEncodeIntegerGetType(lval, intenc, &enclen);
+    unsigned long backlen = lpEncodeBacklen(NULL, enclen);
+    return LP_HDR_SIZE + (enclen + backlen) * rep + 1;
+}
+
 /* Seek the specified element and returns the pointer to the seeked element.
  * Positive indexes specify the zero-based element to seek from the head to
  * the tail, negative indexes specify elements starting from the tail, where
diff --git a/src/listpack.h b/src/listpack.h
index 304ccd32c..a60f089f9 100644
--- a/src/listpack.h
+++ b/src/listpack.h
@@ -82,6 +82,7 @@ unsigned char *lpLast(unsigned char *lp);
 unsigned char *lpNext(unsigned char *lp, unsigned char *p);
 unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
 size_t lpBytes(unsigned char *lp);
+size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep);
 unsigned char *lpSeek(unsigned char *lp, long index);
 typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata);
 int lpValidateIntegrity(unsigned char *lp, size_t size, int deep,
diff --git a/src/rdb.c b/src/rdb.c
index 733181ac6..8a99d4aa2 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -1875,14 +1875,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                      * of many small ones. It's OK since lpSafeToAdd doesn't
                      * care about individual elements, only the total size. */
                     setTypeConvert(o, OBJ_ENCODING_LISTPACK);
-                } else {
-                    setTypeConvert(o,OBJ_ENCODING_HT);
-                    if (dictTryExpand(o->ptr,len) != DICT_OK) {
-                        rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
-                        sdsfree(sdsele);
-                        decrRefCount(o);
-                        return NULL;
-                    }
+                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+                    rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+                    sdsfree(sdsele);
+                    decrRefCount(o);
+                    return NULL;
                 }
             }
 
@@ -1901,15 +1898,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                         return NULL;
                     }
                     o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
-                } else {
-                    setTypeConvert(o, OBJ_ENCODING_HT);
-                    if (dictTryExpand(o->ptr, len) != DICT_OK) {
-                        rdbReportCorruptRDB("OOM in dictTryExpand %llu",
-                                            (unsigned long long)len);
-                        sdsfree(sdsele);
-                        decrRefCount(o);
-                        return NULL;
-                    }
+                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
+                    rdbReportCorruptRDB("OOM in dictTryExpand %llu",
+                                        (unsigned long long)len);
+                    sdsfree(sdsele);
+                    decrRefCount(o);
+                    return NULL;
                 }
             }
 
diff --git a/src/server.h b/src/server.h
index 105b2db4e..1ce5ff781 100644
--- a/src/server.h
+++ b/src/server.h
@@ -3022,6 +3022,7 @@ int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele);
 unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
 unsigned long setTypeSize(const robj *subject);
 void setTypeConvert(robj *subject, int enc);
+int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic);
 robj *setTypeDup(robj *o);
 
 /* Hash data type */
diff --git a/src/t_set.c b/src/t_set.c
index 197cabcd9..ae5f26540 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -159,7 +159,7 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
                 set->ptr = lp;
             } else {
                 /* Size limit is reached. Convert to hashtable and add. */
-                setTypeConvert(set, OBJ_ENCODING_HT);
+                setTypeConvertAndExpand(set, OBJ_ENCODING_HT, lpLength(lp) + 1, 1);
                 serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
             }
             return 1;
@@ -174,23 +174,34 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
                 return 1;
             }
         } else {
-            size_t maxelelen = intsetLen(set->ptr) == 0 ?
-                0 : max(sdigits10(intsetMax(set->ptr)),
-                        sdigits10(intsetMin(set->ptr)));
+            /* Check if listpack encoding is safe not to cross any threshold. */
+            size_t maxelelen = 0, totsize = 0;
+            unsigned long n = intsetLen(set->ptr);
+            if (n != 0) {
+                size_t elelen1 = sdigits10(intsetMax(set->ptr));
+                size_t elelen2 = sdigits10(intsetMin(set->ptr));
+                maxelelen = max(elelen1, elelen2);
+                size_t s1 = lpEstimateBytesRepeatedInteger(intsetMax(set->ptr), n);
+                size_t s2 = lpEstimateBytesRepeatedInteger(intsetMin(set->ptr), n);
+                totsize = max(s1, s2);
+            }
             if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries &&
                 len <= server.set_max_listpack_value &&
                 maxelelen <= server.set_max_listpack_value &&
-                lpSafeToAdd(NULL, maxelelen * intsetLen(set->ptr) + len))
+                lpSafeToAdd(NULL, totsize + len))
             {
                 /* In the "safe to add" check above we assumed all elements in
                  * the intset are of size maxelelen. This is an upper bound. */
-                setTypeConvert(set, OBJ_ENCODING_LISTPACK);
+                setTypeConvertAndExpand(set, OBJ_ENCODING_LISTPACK,
+                                        intsetLen(set->ptr) + 1, 1);
                 unsigned char *lp = set->ptr;
                 lp = lpAppend(lp, (unsigned char *)str, len);
+                lp = lpShrinkToFit(lp);
                 set->ptr = lp;
                 return 1;
             } else {
-                setTypeConvert(set, OBJ_ENCODING_HT);
+                setTypeConvertAndExpand(set, OBJ_ENCODING_HT,
+                                        intsetLen(set->ptr) + 1, 1);
                 /* The set *was* an intset and this value is not integer
                  * encodable, so dictAdd should always work. */
                 serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK);
@@ -468,6 +479,14 @@ unsigned long setTypeSize(const robj *subject) {
  * to a hash table) is presized to hold the number of elements in the original
  * set. */
 void setTypeConvert(robj *setobj, int enc) {
+    setTypeConvertAndExpand(setobj, enc, setTypeSize(setobj), 1);
+}
+
+/* Converts a set to the specified encoding, pre-sizing it for 'cap' elements.
+ * The 'panic' argument controls whether to panic on OOM (panic=1) or return
+ * C_ERR on OOM (panic=0). If panic=1 is given, this function always returns
+ * C_OK. */
+int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic) {
     setTypeIterator *si;
     serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
                              setobj->encoding != enc);
@@ -477,7 +496,12 @@ void setTypeConvert(robj *setobj, int enc) {
         sds element;
 
         /* Presize the dict to avoid rehashing */
-        dictExpand(d, setTypeSize(setobj));
+        if (panic) {
+            dictExpand(d, cap);
+        } else if (dictTryExpand(d, cap) != DICT_OK) {
+            dictRelease(d);
+            return C_ERR;
+        }
 
         /* To add the elements we extract integers and create redis objects */
         si = setTypeInitIterator(setobj);
@@ -490,8 +514,14 @@ void setTypeConvert(robj *setobj, int enc) {
         setobj->encoding = OBJ_ENCODING_HT;
         setobj->ptr = d;
     } else if (enc == OBJ_ENCODING_LISTPACK) {
-        /* Preallocate the minimum one byte per element */
-        size_t estcap = setTypeSize(setobj);
+        /* Preallocate the minimum two bytes per element (enc/value + backlen) */
+        size_t estcap = cap * 2;
+        if (setobj->encoding == OBJ_ENCODING_INTSET && setTypeSize(setobj) > 0) {
+            /* If we're converting from intset, we have a better estimate. */
+            size_t s1 = lpEstimateBytesRepeatedInteger(intsetMin(setobj->ptr), cap);
+            size_t s2 = lpEstimateBytesRepeatedInteger(intsetMax(setobj->ptr), cap);
+            estcap = max(s1, s2);
+        }
         unsigned char *lp = lpNew(estcap);
         char *str;
         size_t len;
@@ -511,6 +541,7 @@ void setTypeConvert(robj *setobj, int enc) {
     } else {
         serverPanic("Unsupported set conversion");
     }
+    return C_OK;
 }
 
 /* This is a helper function for the COPY command.