summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSalvatore Sanfilippo <antirez@gmail.com>2018-07-03 12:36:53 +0200
committerGitHub <noreply@github.com>2018-07-03 12:36:53 +0200
commit02e385169eb9a0c5710062211e6916d7f8aa38c0 (patch)
tree9dd72ca78dd7819d829edce2a5b5db5ef5b33662
parent2edcafb35d3cc2dcc516ba68bb173f7765424eea (diff)
parent2e167f7d0e9abb5c49f8f0fb22a3fe2340ad0e31 (diff)
downloadredis-02e385169eb9a0c5710062211e6916d7f8aa38c0.tar.gz
Merge pull request #5081 from trevor211/fixClusterFailover
cluster failover bug
-rw-r--r--src/replication.c2
-rw-r--r--tests/cluster/tests/05-slave-selection.tcl77
2 files changed, 78 insertions, 1 deletions
diff --git a/src/replication.c b/src/replication.c
index 00c2a7199..d5b5dc23c 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -1087,6 +1087,7 @@ void replicationCreateMasterClient(int fd, int dbid) {
if (server.master->reploff == -1)
server.master->flags |= CLIENT_PRE_PSYNC;
if (dbid != -1) selectDb(server.master,dbid);
+ server.repl_down_since = 0;
}
void restartAOF() {
@@ -1950,7 +1951,6 @@ void replicationSetMaster(char *ip, int port) {
* our own parameters, to later PSYNC with the new master. */
if (was_master) replicationCacheMasterUsingMyself();
server.repl_state = REPL_STATE_CONNECT;
- server.repl_down_since = 0;
}
/* Cancel replication, setting the instance as a master itself. */
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
index 6efedce5d..bcb0fa1ea 100644
--- a/tests/cluster/tests/05-slave-selection.tcl
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -92,3 +92,80 @@ test "Node #10 should eventually replicate node #5" {
fail "#10 didn't became slave of #5"
}
}
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 3 master and 15 slaves, so that we have 5
+# slaves for eatch master.
+test "Create a 3 nodes cluster" {
+ create_cluster 3 15
+}
+
+test "Cluster is up" {
+ assert_cluster_state ok
+}
+
+test "The first master has actually 5 slaves" {
+ assert {[llength [lindex [R 0 role] 2]] == 5}
+}
+
+test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
+ set port0 [get_instance_attrib redis 0 port]
+ assert {[lindex [R 3 role] 2] == $port0}
+ assert {[lindex [R 6 role] 2] == $port0}
+ assert {[lindex [R 9 role] 2] == $port0}
+ assert {[lindex [R 12 role] 2] == $port0}
+ assert {[lindex [R 15 role] 2] == $port0}
+}
+
+test {Instance #3, #6, #9, #12 and #15 synced with the master} {
+ wait_for_condition 1000 50 {
+ [RI 3 master_link_status] eq {up} &&
+ [RI 6 master_link_status] eq {up} &&
+ [RI 9 master_link_status] eq {up} &&
+ [RI 12 master_link_status] eq {up} &&
+ [RI 15 master_link_status] eq {up}
+ } else {
+ fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
+ }
+}
+
+proc master_detected {instances} {
+ foreach instance [dict keys $instances] {
+ if {[RI $instance role] eq {master}} {
+ return true
+ }
+ }
+
+ return false
+}
+
+test "New Master down consecutively" {
+ set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
+
+ set loops [expr {[dict size $instances]-1}]
+ for {set i 0} {$i < $loops} {incr i} {
+ set master_id -1
+ foreach instance [dict keys $instances] {
+ if {[RI $instance role] eq {master}} {
+ set master_id $instance
+ break;
+ }
+ }
+
+ if {$master_id eq -1} {
+ fail "no master detected, #loop $i"
+ }
+
+ set instances [dict remove $instances $master_id]
+
+ kill_instance redis $master_id
+ wait_for_condition 1000 50 {
+ [master_detected $instances]
+ } else {
+ failover "No failover detected when master $master_id fails"
+ }
+
+ assert_cluster_state ok
+ }
+}