summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2022-08-24 19:39:15 +0300
committerOran Agra <oran@redislabs.com>2022-09-21 22:42:01 +0300
commita2a28b8034df14ad2c582117a4e62c589256bc16 (patch)
treea8b06531a41a59d34ca75c24ae3cf6331fed9374
parent1e2876e016438b6c688710234b70491927a5b29d (diff)
downloadredis-a2a28b8034df14ad2c582117a4e62c589256bc16.tar.gz
Fix assertion when a key is lazy expired during cluster key migration (#11176)
Redis 7.0 has #9890 which added an assertion when the propagation queue was not flushed and we got to beforeSleep. But it turns out that when processCommands calls getNodeByQuery and decides to reject the command, it can lead to a key that was lazy expired and is deleted without later flushing the propagation queue. This change prevents lazy expiry from deleting the key at this stage (not as part of a command being processed in `call`) (cherry picked from commit c789fb0aa7e4a634c66db8113699f06fd995c4d9)
-rw-r--r--src/cluster.c2
-rw-r--r--src/db.c33
-rw-r--r--src/server.h1
-rw-r--r--tests/test_helper.tcl1
-rw-r--r--tests/unit/cluster/misc.tcl16
5 files changed, 45 insertions, 8 deletions
diff --git a/src/cluster.c b/src/cluster.c
index 490f25b5b..54e58e28b 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -6702,7 +6702,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
* slot migration, the channel will be served from the source
* node until the migration completes with CLUSTER SETSLOT <slot>
* NODE <node-id>. */
- int flags = LOOKUP_NOTOUCH | LOOKUP_NOSTATS | LOOKUP_NONOTIFY;
+ int flags = LOOKUP_NOTOUCH | LOOKUP_NOSTATS | LOOKUP_NONOTIFY | LOOKUP_NOEXPIRE;
if ((migrating_slot || importing_slot) && !is_pubsubshard)
{
if (lookupKeyReadWithFlags(&server.db[0], thiskey, flags) == NULL) missing_keys++;
diff --git a/src/db.c b/src/db.c
index 36de2aa0a..3fd4938af 100644
--- a/src/db.c
+++ b/src/db.c
@@ -41,7 +41,11 @@
* C-level DB API
*----------------------------------------------------------------------------*/
-int expireIfNeeded(redisDb *db, robj *key, int force_delete_expired);
+/* Flags for expireIfNeeded */
+#define EXPIRE_FORCE_DELETE_EXPIRED 1
+#define EXPIRE_AVOID_DELETE_EXPIRED 2
+
+int expireIfNeeded(redisDb *db, robj *key, int flags);
int keyIsExpired(redisDb *db, robj *key);
/* Update LFU when an object is accessed.
@@ -72,6 +76,8 @@ void updateLFU(robj *val) {
* LOOKUP_NOSTATS: Don't increment key hits/misses counters.
* LOOKUP_WRITE: Prepare the key for writing (delete expired keys even on
* replicas, use separate keyspace stats and events (TODO)).
+ * LOOKUP_NOEXPIRE: Perform expiration check, but avoid deleting the key,
+ * so that we don't have to propagate the deletion.
*
* Note: this function also returns NULL if the key is logically expired but
* still existing, in case this is a replica and the LOOKUP_WRITE is not set.
@@ -92,8 +98,12 @@ robj *lookupKey(redisDb *db, robj *key, int flags) {
* command, since the command may trigger events that cause modules to
* perform additional writes. */
int is_ro_replica = server.masterhost && server.repl_slave_ro;
- int force_delete_expired = flags & LOOKUP_WRITE && !is_ro_replica;
- if (expireIfNeeded(db, key, force_delete_expired)) {
+ int expire_flags = 0;
+ if (flags & LOOKUP_WRITE && !is_ro_replica)
+ expire_flags |= EXPIRE_FORCE_DELETE_EXPIRED;
+ if (flags & LOOKUP_NOEXPIRE)
+ expire_flags |= EXPIRE_AVOID_DELETE_EXPIRED;
+ if (expireIfNeeded(db, key, expire_flags)) {
/* The key is no longer valid. */
val = NULL;
}
@@ -1657,13 +1667,17 @@ int keyIsExpired(redisDb *db, robj *key) {
*
* On replicas, this function does not delete expired keys by default, but
* it still returns 1 if the key is logically expired. To force deletion
- * of logically expired keys even on replicas, set force_delete_expired to
- * a non-zero value. Note though that if the current client is executing
+ * of logically expired keys even on replicas, use the EXPIRE_FORCE_DELETE_EXPIRED
+ * flag. Note though that if the current client is executing
* replicated commands from the master, keys are never considered expired.
*
+ * On the other hand, if you just want expiration check, but need to avoid
+ * the actual key deletion and propagation of the deletion, use the
+ * EXPIRE_AVOID_DELETE_EXPIRED flag.
+ *
* The return value of the function is 0 if the key is still valid,
* otherwise the function returns 1 if the key is expired. */
-int expireIfNeeded(redisDb *db, robj *key, int force_delete_expired) {
+int expireIfNeeded(redisDb *db, robj *key, int flags) {
if (!keyIsExpired(db,key)) return 0;
/* If we are running in the context of a replica, instead of
@@ -1681,9 +1695,14 @@ int expireIfNeeded(redisDb *db, robj *key, int force_delete_expired) {
* expired. */
if (server.masterhost != NULL) {
if (server.current_client == server.master) return 0;
- if (!force_delete_expired) return 1;
+ if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return 1;
}
+ /* In some cases we're explicitly instructed to return an indication of a
+ * missing key without actually deleting it, even on masters. */
+ if (flags & EXPIRE_AVOID_DELETE_EXPIRED)
+ return 1;
+
/* If clients are paused, we keep the current dataset constant,
* but return to the client what we believe is the right state. Typically,
* at the end of the pause we will properly expire the key OR we will
diff --git a/src/server.h b/src/server.h
index f33eaa5ce..bac67fe97 100644
--- a/src/server.h
+++ b/src/server.h
@@ -3099,6 +3099,7 @@ int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
#define LOOKUP_NONOTIFY (1<<1) /* Don't trigger keyspace event on key misses. */
#define LOOKUP_NOSTATS (1<<2) /* Don't update keyspace hits/misses counters. */
#define LOOKUP_WRITE (1<<3) /* Delete expired keys even in replicas. */
+#define LOOKUP_NOEXPIRE (1<<4) /* Avoid deleting lazy expired keys. */
void dbAdd(redisDb *db, robj *key, robj *val);
int dbAddRDBLoad(redisDb *db, sds key, robj *val);
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 2dcccb8cf..f6b04c89c 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -95,6 +95,7 @@ set ::all_tests {
unit/violations
unit/replybufsize
unit/cluster-scripting
+ unit/cluster/misc
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
diff --git a/tests/unit/cluster/misc.tcl b/tests/unit/cluster/misc.tcl
new file mode 100644
index 000000000..35308b81a
--- /dev/null
+++ b/tests/unit/cluster/misc.tcl
@@ -0,0 +1,16 @@
+start_cluster 2 2 {tags {external:skip cluster}} {
+ test {Key lazy expires during key migration} {
+ R 0 DEBUG SET-ACTIVE-EXPIRE 0
+
+ set key_slot [R 0 CLUSTER KEYSLOT FOO]
+ R 0 set FOO BAR PX 10
+ set src_id [R 0 CLUSTER MYID]
+ set trg_id [R 1 CLUSTER MYID]
+ R 0 CLUSTER SETSLOT $key_slot MIGRATING $trg_id
+ R 1 CLUSTER SETSLOT $key_slot IMPORTING $src_id
+ after 11
+ assert_error {ASK*} {R 0 GET FOO}
+ R 0 ping
+ } {PONG}
+}
+