summaryrefslogtreecommitdiff
path: root/tests/cluster
diff options
context:
space:
mode:
authorMadelyn Olson <34459052+madolson@users.noreply.github.com>2022-03-30 22:15:00 -0700
committerGitHub <noreply@github.com>2022-03-31 08:15:00 +0300
commite81bd15e992362062e2d60b614407667431cf248 (patch)
tree01b2dbe42e29228992dd7dcd5e36e2a4bbae19bc /tests/cluster
parenta3075ca4fea92b672689c74dc63c3550cd626a26 (diff)
downloadredis-e81bd15e992362062e2d60b614407667431cf248.tar.gz
Prevent replica failover during manual takeover test (#10499)
During 11-manual-takeover.tcl, if the killing of the instances happens too slowly, one of the replicas might be able to promote itself. I'm not sure why it was slow, but it was observed taking 6 seconds which is enough time to do an election. I was able to verify the error locally by adding a small delay (1 second) during ASAN CI. A fix is just to disable automated failover until all the nodes are confirmed dead.
Diffstat (limited to 'tests/cluster')
-rw-r--r--tests/cluster/tests/11-manual-takeover.tcl24
1 files changed, 18 insertions, 6 deletions
diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl
index f567c6962..78a0f858b 100644
--- a/tests/cluster/tests/11-manual-takeover.tcl
+++ b/tests/cluster/tests/11-manual-takeover.tcl
@@ -14,20 +14,32 @@ test "Cluster is writable" {
cluster_write_test 0
}
+# For this test, disable replica failover until
+# all of the primaries are confirmed killed. Otherwise
+# there might be enough time to elect a replica.
+set replica_ids { 5 6 7 }
+foreach id $replica_ids {
+ R $id config set cluster-replica-no-failover yes
+}
+
test "Killing majority of master nodes" {
kill_instance redis 0
kill_instance redis 1
kill_instance redis 2
}
+foreach id $replica_ids {
+ R $id config set cluster-replica-no-failover no
+}
+
test "Cluster should eventually be down" {
assert_cluster_state fail
}
test "Use takeover to bring slaves back" {
- R 5 cluster failover takeover
- R 6 cluster failover takeover
- R 7 cluster failover takeover
+ foreach id $replica_ids {
+ R $id cluster failover takeover
+ }
}
test "Cluster should eventually be up again" {
@@ -39,9 +51,9 @@ test "Cluster is writable" {
}
test "Instance #5, #6, #7 are now masters" {
- assert {[RI 5 role] eq {master}}
- assert {[RI 6 role] eq {master}}
- assert {[RI 7 role] eq {master}}
+ foreach id $replica_ids {
+ assert {[RI $id role] eq {master}}
+ }
}
test "Restarting the previously killed master nodes" {