From 6fa8e4f7afcbd14748c736c38e5fbc117eaef7ba Mon Sep 17 00:00:00 2001
From: Madelyn Olson <34459052+madolson@users.noreply.github.com>
Date: Tue, 26 Apr 2022 03:25:33 -0700
Subject: Set replicas to panic on disk errors, and optionally panic on
 replication errors (#10504)

* Till now, replicas that were unable to persist, would still execute the commands
  they got from the master, now they'll panic by default, and we add a new
  `replica-ignore-disk-errors` config to change that.
* Till now, when a command failed on a replica or AOF-loading, it only logged a
  warning and a stat, we add a new `propagation-error-behavior` config to allow
  panicking in that state (may become the default one day)

Note that commands that fail on the replica can either indicate a bug that could
cause data inconsistency between the replica and the master, or they could be
in some cases (specifically in previous versions), a result of a command (e.g. EVAL)
that failed on the master, but still had to be propagated to fail on the replica as well.
---
 redis.conf                          | 25 +++++++++++++++++++++
 src/config.c                        | 11 ++++++++-
 src/debug.c                         |  6 +++++
 src/networking.c                    | 15 +++++++++++++
 src/server.c                        | 26 ++++++++++++++++-----
 src/server.h                        | 13 +++++++++++
 tests/assets/default.conf           |  2 ++
 tests/integration/replication-4.tcl | 45 +++++++++++++++++++++++++++++++++++++
 8 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/redis.conf b/redis.conf
index d2074f13f..62062b397 100644
--- a/redis.conf
+++ b/redis.conf
@@ -728,6 +728,31 @@ repl-disable-tcp-nodelay no
 # By default the priority is 100.
 replica-priority 100
 
+# The propagation error behavior controls how Redis will behave when it is
+# unable to handle a command being processed in the replication stream from a master
+# or processed while reading from an AOF file. Errors that occur during propagation
+# are unexpected, and can cause data inconsistency. However, there are edge cases
+# in earlier versions of Redis where it was possible for the server to replicate or persist
+# commands that would fail on future versions. For this reason the default behavior
+# is to ignore such errors and continue processing commands.
+#
+# If an application wants to ensure there is no data divergence, this configuration
+# should be set to 'panic' instead. The value can also be set to 'panic-on-replicas'
+# to only panic when a replica encounters an error on the replication stream. One of
+# these two panic values will become the default value in the future once there are
+# sufficient safety mechanisms in place to prevent false positive crashes.
+#
+# propagation-error-behavior ignore
+
+# Replica ignore disk write errors controls the behavior of a replica when it is
+# unable to persist a write command received from its master to disk. By default,
+# this configuration is set to 'no' and will crash the replica in this condition.
+# It is not recommended to change this default, however in order to be compatible
+# with older versions of Redis this config can be toggled to 'yes' which will just
+# log a warning and execute the write command it got from the master.
+#
+# replica-ignore-disk-write-errors no
+
 # -----------------------------------------------------------------------------
 # By default, Redis Sentinel includes all replicas in its reports. A replica
 # can be excluded from Redis Sentinel's announcements. An unannounced replica
diff --git a/src/config.c b/src/config.c
index d5e347892..61841cb62 100644
--- a/src/config.c
+++ b/src/config.c
@@ -143,6 +143,13 @@ configEnum cluster_preferred_endpoint_type_enum[] = {
     {NULL, 0}
 };
 
+configEnum propagation_error_behavior_enum[] = {
+    {"ignore", PROPAGATION_ERR_BEHAVIOR_IGNORE},
+    {"panic", PROPAGATION_ERR_BEHAVIOR_PANIC},
+    {"panic-on-replicas", PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS},
+    {NULL, 0}
+};
+
 /* Output buffer limits presets. */
 clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
     {0, 0, 0}, /* normal */
@@ -2893,7 +2900,8 @@ standardConfig static_configs[] = {
     createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),
     createBoolConfig("latency-tracking", NULL, MODIFIABLE_CONFIG, server.latency_tracking_enabled, 1, NULL, NULL),
     createBoolConfig("aof-disable-auto-gc", NULL, MODIFIABLE_CONFIG, server.aof_disable_auto_gc, 0, NULL, updateAofAutoGCEnabled),
-    
+    createBoolConfig("replica-ignore-disk-write-errors", NULL, MODIFIABLE_CONFIG, server.repl_ignore_disk_write_error, 0, NULL, NULL),
+
     /* String Configs */
     createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
     createStringConfig("unixsocket", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.unixsocket, NULL, NULL, NULL),
@@ -2934,6 +2942,7 @@ standardConfig static_configs[] = {
     createEnumConfig("enable-debug-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_debug_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL),
     createEnumConfig("enable-module-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_module_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL),
     createEnumConfig("cluster-preferred-endpoint-type", NULL, MODIFIABLE_CONFIG, cluster_preferred_endpoint_type_enum, server.cluster_preferred_endpoint_type, CLUSTER_ENDPOINT_TYPE_IP, NULL, NULL),
+    createEnumConfig("propagation-error-behavior", NULL, MODIFIABLE_CONFIG, propagation_error_behavior_enum, server.propagation_error_behavior, PROPAGATION_ERR_BEHAVIOR_IGNORE, NULL, NULL),
 
     /* Integer configs */
     createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL),
diff --git a/src/debug.c b/src/debug.c
index 4f0e37777..081cb862f 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -416,6 +416,8 @@ void debugCommand(client *c) {
 "    Like HTSTATS but for the hash table stored at <key>'s value.",
 "LOADAOF",
 "    Flush the AOF buffers on disk and reload the AOF in memory.",
+"REPLICATE <string>",
+"    Replicates the provided string to replicas, allowing data divergence.",
 #ifdef USE_JEMALLOC
 "MALLCTL <key> [<val>]",
 "    Get or set a malloc tuning integer.",
@@ -849,6 +851,10 @@ NULL
     {
         server.aof_flush_sleep = atoi(c->argv[2]->ptr);
         addReply(c,shared.ok);
+    } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc >= 3) {
+        replicationFeedSlaves(server.slaves, server.slaveseldb,
+                c->argv + 2, c->argc - 2);
+        addReply(c,shared.ok);
     } else if (!strcasecmp(c->argv[1]->ptr,"error") && c->argc == 3) {
         sds errstr = sdsnewlen("-",1);
 
diff --git a/src/networking.c b/src/networking.c
index e79b18d70..0664e2bf0 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -539,6 +539,21 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) {
             showLatestBacklog();
         }
         server.stat_unexpected_error_replies++;
+
+        /* Based off the propagation error behavior, check if we need to panic here. There
+         * are currently two checked cases:
+         * * If this command was from our master and we are not a writable replica.
+         * * We are reading from an AOF file. */
+        int panic_in_replicas = (ctype == CLIENT_TYPE_MASTER && server.repl_slave_ro)
+            && (server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC ||
+            server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS);
+        int panic_in_aof = c->id == CLIENT_ID_AOF 
+            && server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC;
+        if (panic_in_replicas || panic_in_aof) {
+            serverPanic("This %s panicked sending an error to its %s"
+                " after processing the command '%s'",
+                from, to, cmdname ? cmdname : "<unknown>");
+        }
     }
 }
 
diff --git a/src/server.c b/src/server.c
index c603af02e..dbfe5aaea 100644
--- a/src/server.c
+++ b/src/server.c
@@ -3755,15 +3755,29 @@ int processCommand(client *c) {
     if (server.tracking_clients) trackingLimitUsedSlots();
 
     /* Don't accept write commands if there are problems persisting on disk
-     * unless coming from our master. */
+     * unless coming from our master, in which case check the replica ignore
+     * disk write error config to either log or crash. */
     int deny_write_type = writeCommandsDeniedByDiskError();
     if (deny_write_type != DISK_ERROR_TYPE_NONE &&
-        !obey_client &&
-        (is_write_command ||c->cmd->proc == pingCommand))
+        (is_write_command || c->cmd->proc == pingCommand))
     {
-        sds err = writeCommandsGetDiskErrorMessage(deny_write_type);
-        rejectCommandSds(c, err);
-        return C_OK;
+        if (obey_client) {
+            if (!server.repl_ignore_disk_write_error && c->cmd->proc != pingCommand) {
+                serverPanic("Replica was unable to write command to disk.");
+            } else {
+                static mstime_t last_log_time_ms = 0;
+                const mstime_t log_interval_ms = 10000;
+                if (server.mstime > last_log_time_ms + log_interval_ms) {
+                    last_log_time_ms = server.mstime;
+                    serverLog(LL_WARNING, "Replica is applying a command even though "
+                                          "it is unable to write to disk.");
+                }
+            }
+        } else {
+            sds err = writeCommandsGetDiskErrorMessage(deny_write_type);
+            rejectCommandSds(c, err);
+            return C_OK;
+        }
     }
 
     /* Don't accept write commands if there are not enough good slaves and
diff --git a/src/server.h b/src/server.h
index ef0215fe0..b26ec7b3b 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1325,6 +1325,15 @@ struct redisMemOverhead {
     } *db;
 };
 
+/* Replication error behavior determines the replica behavior
+ * when it receives an error over the replication stream. In
+ * either case the error is logged. */
+enum {
+    PROPAGATION_ERR_BEHAVIOR_IGNORE = 0,
+    PROPAGATION_ERR_BEHAVIOR_PANIC,
+    PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS
+} replicationErrorBehavior;
+
 /* This structure can be optionally passed to RDB save/load functions in
  * order to implement additional functionalities, by storing and loading
  * metadata to the RDB file.
@@ -1772,6 +1781,10 @@ struct redisServer {
     int replica_announced;          /* If true, replica is announced by Sentinel */
     int slave_announce_port;        /* Give the master this listening port. */
     char *slave_announce_ip;        /* Give the master this ip address. */
+    int propagation_error_behavior; /* Configures the behavior of the replica
+                                     * when it receives an error on the replication stream */
+    int repl_ignore_disk_write_error;   /* Configures whether replicas panic when unable to
+                                         * persist writes to AOF. */
     /* The following two fields is where we store master PSYNC replid/offset
      * while the PSYNC is in progress. At the end we'll copy the fields into
      * the server->master client structure. */
diff --git a/tests/assets/default.conf b/tests/assets/default.conf
index 42297903f..227ab5e7f 100644
--- a/tests/assets/default.conf
+++ b/tests/assets/default.conf
@@ -30,3 +30,5 @@ activerehashing yes
 enable-protected-configs yes
 enable-debug-command yes
 enable-module-command yes
+
+propagation-error-behavior panic
\ No newline at end of file
diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl
index 281d5a8eb..9f4281f7c 100644
--- a/tests/integration/replication-4.tcl
+++ b/tests/integration/replication-4.tcl
@@ -195,3 +195,48 @@ start_server {tags {"repl external:skip"}} {
         }
     }
 }
+
+start_server {tags {"repl external:skip"}} {
+    start_server {} {
+        set master [srv -1 client]
+        set master_host [srv -1 host]
+        set master_port [srv -1 port]
+        set replica [srv 0 client]
+
+        test {First server should have role slave after SLAVEOF} {
+            $replica slaveof $master_host $master_port
+            wait_for_condition 50 100 {
+                [s 0 role] eq {slave}
+            } else {
+                fail "Replication not started."
+            }
+            wait_for_sync $replica
+        }
+
+        test {Data divergence can happen under default conditions} {       
+            $replica config set propagation-error-behavior ignore     
+            $master debug replicate fake-command-1
+
+            # Wait for replication to normalize
+            $master set foo bar2
+            $master wait 1 2000
+
+            # Make sure we triggered the error, by finding the critical
+            # message and the fake command.
+            assert_equal [count_log_message 0 "fake-command-1"] 1
+            assert_equal [count_log_message 0 "== CRITICAL =="] 1
+        }
+
+        test {Data divergence is allowed on writable replicas} {            
+            $replica config set replica-read-only no
+            $replica set number2 foo
+            $master incrby number2 1
+            $master wait 1 2000
+
+            assert_equal [$master get number2] 1
+            assert_equal [$replica get number2] foo
+
+            assert_equal [count_log_message 0 "incrby"] 1
+        }
+    }
+}
-- 
cgit v1.2.1