summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMadelyn Olson <34459052+madolson@users.noreply.github.com>2023-05-08 16:11:20 -0700
committerGitHub <noreply@github.com>2023-05-08 16:11:20 -0700
commita129a601818d88a2f2aa9d954afc8afa43030f7e (patch)
tree85de9f99c4cb77621a14ef97c3f21439aba5fb20
parent51af17e3cf62cf77258c35509544a9b273327d2a (diff)
downloadredis-a129a601818d88a2f2aa9d954afc8afa43030f7e.tar.gz
Minor performance improvement to SADD and HSET (#12019)
For sets and hashes that will eventually be stored as the hash encoding, it's much faster to immediately convert them to their hash encoding and then perform the insertions since it avoids the O(N) search and frequent reallocations. This change checks the number of arguments in the incoming command, and converts the data-structure if the number of new entries exceeds the listpack-max-entries configuration. This can cause us to over-allocate memory if their are duplicate entries in the input, which is unexpected. unstable Summary: throughput summary: 805.54 requests per second latency summary (msec): avg min p50 p95 p99 max 61.908 25.680 68.351 73.279 75.967 79.295 hset-improvement Summary: throughput summary: 4701.46 requests per second latency summary (msec): avg min p50 p95 p99 max 10.546 0.832 11.959 12.471 13.119 14.967
-rw-r--r--src/server.h2
-rw-r--r--src/t_hash.c10
-rw-r--r--src/t_set.c34
3 files changed, 39 insertions, 7 deletions
diff --git a/src/server.h b/src/server.h
index bced3dbb8..af6847f13 100644
--- a/src/server.h
+++ b/src/server.h
@@ -3069,7 +3069,7 @@ void dismissMemoryInChild(void);
int restartServer(int flags, mstime_t delay);
/* Set data type */
-robj *setTypeCreate(sds value);
+robj *setTypeCreate(sds value, size_t size_hint);
int setTypeAdd(robj *subject, sds value);
int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds);
int setTypeRemove(robj *subject, sds value);
diff --git a/src/t_hash.c b/src/t_hash.c
index 6005f4442..f7d5af649 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -43,6 +43,16 @@ void hashTypeTryConversion(robj *o, robj **argv, int start, int end) {
if (o->encoding != OBJ_ENCODING_LISTPACK) return;
+ /* We guess that most of the values in the input are unique, so
+ * if there are enough arguments we create a pre-sized hash, which
+ * might overallocate memory if their are duplicates. */
+ size_t new_fields = (end - start + 1) / 2;
+ if (new_fields > server.hash_max_listpack_entries) {
+ hashTypeConvert(o, OBJ_ENCODING_HT);
+ dictExpand(o->ptr, new_fields);
+ return;
+ }
+
for (i = start; i <= end; i++) {
if (!sdsEncodedObject(argv[i]))
continue;
diff --git a/src/t_set.c b/src/t_set.c
index ba32ae51c..56ad95a53 100644
--- a/src/t_set.c
+++ b/src/t_set.c
@@ -39,11 +39,31 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
- * hash table. */
-robj *setTypeCreate(sds value) {
- if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
+ * hash table.
+ *
+ * The size hint indicates approximately how many items will be added which is
+ * used to determine the initial representation. */
+robj *setTypeCreate(sds value, size_t size_hint) {
+ if (isSdsRepresentableAsLongLong(value,NULL) == C_OK && size_hint < server.set_max_intset_entries)
return createIntsetObject();
- return createSetListpackObject();
+ if (size_hint < server.set_max_listpack_entries)
+ return createSetListpackObject();
+
+ /* We may oversize the set by using the hint if the hint is not accurate,
+ * but we will assume this is accpetable to maximize performance. */
+ robj *o = createSetObject();
+ dictExpand(o->ptr, size_hint);
+ return o;
+}
+
+/* Check if the existing set should be converted to another encoding based off the
+ * the size hint. */
+void setTypeMaybeConvert(robj *set, size_t size_hint) {
+ if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint >= server.set_max_listpack_entries)
+ || (set->encoding == OBJ_ENCODING_INTSET && size_hint >= server.set_max_intset_entries))
+ {
+ setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1);
+ }
}
/* Return the maximum number of entries to store in an intset. */
@@ -590,8 +610,10 @@ void saddCommand(client *c) {
if (checkType(c,set,OBJ_SET)) return;
if (set == NULL) {
- set = setTypeCreate(c->argv[2]->ptr);
+ set = setTypeCreate(c->argv[2]->ptr, c->argc - 2);
dbAdd(c->db,c->argv[1],set);
+ } else {
+ setTypeMaybeConvert(set, c->argc - 2);
}
for (j = 2; j < c->argc; j++) {
@@ -672,7 +694,7 @@ void smoveCommand(client *c) {
/* Create the destination set when it doesn't exist */
if (!dstset) {
- dstset = setTypeCreate(ele->ptr);
+ dstset = setTypeCreate(ele->ptr, 1);
dbAdd(c->db,c->argv[2],dstset);
}