From 6fa8e4f7afcbd14748c736c38e5fbc117eaef7ba Mon Sep 17 00:00:00 2001 From: Madelyn Olson <34459052+madolson@users.noreply.github.com> Date: Tue, 26 Apr 2022 03:25:33 -0700 Subject: Set replicas to panic on disk errors, and optionally panic on replication errors (#10504) * Till now, replicas that were unable to persist, would still execute the commands they got from the master, now they'll panic by default, and we add a new `replica-ignore-disk-errors` config to change that. * Till now, when a command failed on a replica or AOF-loading, it only logged a warning and a stat, we add a new `propagation-error-behavior` config to allow panicking in that state (may become the default one day) Note that commands that fail on the replica can either indicate a bug that could cause data inconsistency between the replica and the master, or they could be in some cases (specifically in previous versions), a result of a command (e.g. EVAL) that failed on the master, but still had to be propagated to fail on the replica as well. --- redis.conf | 25 +++++++++++++++++++++ src/config.c | 11 ++++++++- src/debug.c | 6 +++++ src/networking.c | 15 +++++++++++++ src/server.c | 26 ++++++++++++++++----- src/server.h | 13 +++++++++++ tests/assets/default.conf | 2 ++ tests/integration/replication-4.tcl | 45 +++++++++++++++++++++++++++++++++++++ 8 files changed, 136 insertions(+), 7 deletions(-) diff --git a/redis.conf b/redis.conf index d2074f13f..62062b397 100644 --- a/redis.conf +++ b/redis.conf @@ -728,6 +728,31 @@ repl-disable-tcp-nodelay no # By default the priority is 100. replica-priority 100 +# The propagation error behavior controls how Redis will behave when it is +# unable to handle a command being processed in the replication stream from a master +# or processed while reading from an AOF file. Errors that occur during propagation +# are unexpected, and can cause data inconsistency. However, there are edge cases +# in earlier versions of Redis where it was possible for the server to replicate or persist +# commands that would fail on future versions. For this reason the default behavior +# is to ignore such errors and continue processing commands. +# +# If an application wants to ensure there is no data divergence, this configuration +# should be set to 'panic' instead. The value can also be set to 'panic-on-replicas' +# to only panic when a replica encounters an error on the replication stream. One of +# these two panic values will become the default value in the future once there are +# sufficient safety mechanisms in place to prevent false positive crashes. +# +# propagation-error-behavior ignore + +# Replica ignore disk write errors controls the behavior of a replica when it is +# unable to persist a write command received from its master to disk. By default, +# this configuration is set to 'no' and will crash the replica in this condition. +# It is not recommended to change this default, however in order to be compatible +# with older versions of Redis this config can be toggled to 'yes' which will just +# log a warning and execute the write command it got from the master. +# +# replica-ignore-disk-write-errors no + # ----------------------------------------------------------------------------- # By default, Redis Sentinel includes all replicas in its reports. A replica # can be excluded from Redis Sentinel's announcements. An unannounced replica diff --git a/src/config.c b/src/config.c index d5e347892..61841cb62 100644 --- a/src/config.c +++ b/src/config.c @@ -143,6 +143,13 @@ configEnum cluster_preferred_endpoint_type_enum[] = { {NULL, 0} }; +configEnum propagation_error_behavior_enum[] = { + {"ignore", PROPAGATION_ERR_BEHAVIOR_IGNORE}, + {"panic", PROPAGATION_ERR_BEHAVIOR_PANIC}, + {"panic-on-replicas", PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS}, + {NULL, 0} +}; + /* Output buffer limits presets. */ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {0, 0, 0}, /* normal */ @@ -2893,7 +2900,8 @@ standardConfig static_configs[] = { createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL), createBoolConfig("latency-tracking", NULL, MODIFIABLE_CONFIG, server.latency_tracking_enabled, 1, NULL, NULL), createBoolConfig("aof-disable-auto-gc", NULL, MODIFIABLE_CONFIG, server.aof_disable_auto_gc, 0, NULL, updateAofAutoGCEnabled), - + createBoolConfig("replica-ignore-disk-write-errors", NULL, MODIFIABLE_CONFIG, server.repl_ignore_disk_write_error, 0, NULL, NULL), + /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL), createStringConfig("unixsocket", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.unixsocket, NULL, NULL, NULL), @@ -2934,6 +2942,7 @@ standardConfig static_configs[] = { createEnumConfig("enable-debug-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_debug_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL), createEnumConfig("enable-module-command", NULL, IMMUTABLE_CONFIG, protected_action_enum, server.enable_module_cmd, PROTECTED_ACTION_ALLOWED_NO, NULL, NULL), createEnumConfig("cluster-preferred-endpoint-type", NULL, MODIFIABLE_CONFIG, cluster_preferred_endpoint_type_enum, server.cluster_preferred_endpoint_type, CLUSTER_ENDPOINT_TYPE_IP, NULL, NULL), + createEnumConfig("propagation-error-behavior", NULL, MODIFIABLE_CONFIG, propagation_error_behavior_enum, server.propagation_error_behavior, PROPAGATION_ERR_BEHAVIOR_IGNORE, NULL, NULL), /* Integer configs */ createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, server.dbnum, 16, INTEGER_CONFIG, NULL, NULL), diff --git a/src/debug.c b/src/debug.c index 4f0e37777..081cb862f 100644 --- a/src/debug.c +++ b/src/debug.c @@ -416,6 +416,8 @@ void debugCommand(client *c) { " Like HTSTATS but for the hash table stored at 's value.", "LOADAOF", " Flush the AOF buffers on disk and reload the AOF in memory.", +"REPLICATE ", +" Replicates the provided string to replicas, allowing data divergence.", #ifdef USE_JEMALLOC "MALLCTL []", " Get or set a malloc tuning integer.", @@ -849,6 +851,10 @@ NULL { server.aof_flush_sleep = atoi(c->argv[2]->ptr); addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"replicate") && c->argc >= 3) { + replicationFeedSlaves(server.slaves, server.slaveseldb, + c->argv + 2, c->argc - 2); + addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"error") && c->argc == 3) { sds errstr = sdsnewlen("-",1); diff --git a/src/networking.c b/src/networking.c index e79b18d70..0664e2bf0 100644 --- a/src/networking.c +++ b/src/networking.c @@ -539,6 +539,21 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) { showLatestBacklog(); } server.stat_unexpected_error_replies++; + + /* Based off the propagation error behavior, check if we need to panic here. There + * are currently two checked cases: + * * If this command was from our master and we are not a writable replica. + * * We are reading from an AOF file. */ + int panic_in_replicas = (ctype == CLIENT_TYPE_MASTER && server.repl_slave_ro) + && (server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC || + server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS); + int panic_in_aof = c->id == CLIENT_ID_AOF + && server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC; + if (panic_in_replicas || panic_in_aof) { + serverPanic("This %s panicked sending an error to its %s" + " after processing the command '%s'", + from, to, cmdname ? cmdname : ""); + } } } diff --git a/src/server.c b/src/server.c index c603af02e..dbfe5aaea 100644 --- a/src/server.c +++ b/src/server.c @@ -3755,15 +3755,29 @@ int processCommand(client *c) { if (server.tracking_clients) trackingLimitUsedSlots(); /* Don't accept write commands if there are problems persisting on disk - * unless coming from our master. */ + * unless coming from our master, in which case check the replica ignore + * disk write error config to either log or crash. */ int deny_write_type = writeCommandsDeniedByDiskError(); if (deny_write_type != DISK_ERROR_TYPE_NONE && - !obey_client && - (is_write_command ||c->cmd->proc == pingCommand)) + (is_write_command || c->cmd->proc == pingCommand)) { - sds err = writeCommandsGetDiskErrorMessage(deny_write_type); - rejectCommandSds(c, err); - return C_OK; + if (obey_client) { + if (!server.repl_ignore_disk_write_error && c->cmd->proc != pingCommand) { + serverPanic("Replica was unable to write command to disk."); + } else { + static mstime_t last_log_time_ms = 0; + const mstime_t log_interval_ms = 10000; + if (server.mstime > last_log_time_ms + log_interval_ms) { + last_log_time_ms = server.mstime; + serverLog(LL_WARNING, "Replica is applying a command even though " + "it is unable to write to disk."); + } + } + } else { + sds err = writeCommandsGetDiskErrorMessage(deny_write_type); + rejectCommandSds(c, err); + return C_OK; + } } /* Don't accept write commands if there are not enough good slaves and diff --git a/src/server.h b/src/server.h index ef0215fe0..b26ec7b3b 100644 --- a/src/server.h +++ b/src/server.h @@ -1325,6 +1325,15 @@ struct redisMemOverhead { } *db; }; +/* Replication error behavior determines the replica behavior + * when it receives an error over the replication stream. In + * either case the error is logged. */ +enum { + PROPAGATION_ERR_BEHAVIOR_IGNORE = 0, + PROPAGATION_ERR_BEHAVIOR_PANIC, + PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS +} replicationErrorBehavior; + /* This structure can be optionally passed to RDB save/load functions in * order to implement additional functionalities, by storing and loading * metadata to the RDB file. @@ -1772,6 +1781,10 @@ struct redisServer { int replica_announced; /* If true, replica is announced by Sentinel */ int slave_announce_port; /* Give the master this listening port. */ char *slave_announce_ip; /* Give the master this ip address. */ + int propagation_error_behavior; /* Configures the behavior of the replica + * when it receives an error on the replication stream */ + int repl_ignore_disk_write_error; /* Configures whether replicas panic when unable to + * persist writes to AOF. */ /* The following two fields is where we store master PSYNC replid/offset * while the PSYNC is in progress. At the end we'll copy the fields into * the server->master client structure. */ diff --git a/tests/assets/default.conf b/tests/assets/default.conf index 42297903f..227ab5e7f 100644 --- a/tests/assets/default.conf +++ b/tests/assets/default.conf @@ -30,3 +30,5 @@ activerehashing yes enable-protected-configs yes enable-debug-command yes enable-module-command yes + +propagation-error-behavior panic \ No newline at end of file diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl index 281d5a8eb..9f4281f7c 100644 --- a/tests/integration/replication-4.tcl +++ b/tests/integration/replication-4.tcl @@ -195,3 +195,48 @@ start_server {tags {"repl external:skip"}} { } } } + +start_server {tags {"repl external:skip"}} { + start_server {} { + set master [srv -1 client] + set master_host [srv -1 host] + set master_port [srv -1 port] + set replica [srv 0 client] + + test {First server should have role slave after SLAVEOF} { + $replica slaveof $master_host $master_port + wait_for_condition 50 100 { + [s 0 role] eq {slave} + } else { + fail "Replication not started." + } + wait_for_sync $replica + } + + test {Data divergence can happen under default conditions} { + $replica config set propagation-error-behavior ignore + $master debug replicate fake-command-1 + + # Wait for replication to normalize + $master set foo bar2 + $master wait 1 2000 + + # Make sure we triggered the error, by finding the critical + # message and the fake command. + assert_equal [count_log_message 0 "fake-command-1"] 1 + assert_equal [count_log_message 0 "== CRITICAL =="] 1 + } + + test {Data divergence is allowed on writable replicas} { + $replica config set replica-read-only no + $replica set number2 foo + $master incrby number2 1 + $master wait 1 2000 + + assert_equal [$master get number2] 1 + assert_equal [$replica get number2] foo + + assert_equal [count_log_message 0 "incrby"] 1 + } + } +} -- cgit v1.2.1