Merge pull request #5081 from trevor211/fixClusterFailover

cluster failover bug
author: Salvatore Sanfilippo <antirez@gmail.com> 2018-07-03 12:36:53 +0200
committer: GitHub <noreply@github.com> 2018-07-03 12:36:53 +0200
commit: 02e385169eb9a0c5710062211e6916d7f8aa38c0 (patch)
tree: 9dd72ca78dd7819d829edce2a5b5db5ef5b33662
parent: 2edcafb35d3cc2dcc516ba68bb173f7765424eea (diff)
parent: 2e167f7d0e9abb5c49f8f0fb22a3fe2340ad0e31 (diff)
download: redis-02e385169eb9a0c5710062211e6916d7f8aa38c0.tar.gz
2 files changed, 78 insertions, 1 deletions
diff --git a/src/replication.c b/src/replication.c
index 00c2a7199..d5b5dc23c 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -1087,6 +1087,7 @@ void replicationCreateMasterClient(int fd, int dbid) {
     if (server.master->reploff == -1)
         server.master->flags |= CLIENT_PRE_PSYNC;
     if (dbid != -1) selectDb(server.master,dbid);
+    server.repl_down_since = 0;
 }
 
 void restartAOF() {
@@ -1950,7 +1951,6 @@ void replicationSetMaster(char *ip, int port) {
      * our own parameters, to later PSYNC with the new master. */
     if (was_master) replicationCacheMasterUsingMyself();
     server.repl_state = REPL_STATE_CONNECT;
-    server.repl_down_since = 0;
 }
 
 /* Cancel replication, setting the instance as a master itself. */
diff --git a/tests/cluster/tests/05-slave-selection.tcl b/tests/cluster/tests/05-slave-selection.tcl
index 6efedce5d..bcb0fa1ea 100644
--- a/tests/cluster/tests/05-slave-selection.tcl
+++ b/tests/cluster/tests/05-slave-selection.tcl
@@ -92,3 +92,80 @@ test "Node #10 should eventually replicate node #5" {
         fail "#10 didn't became slave of #5"
     }
 }
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 3 master and 15 slaves, so that we have 5
+# slaves for eatch master.
+test "Create a 3 nodes cluster" {
+    create_cluster 3 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "The first master has actually 5 slaves" {
+    assert {[llength [lindex [R 0 role] 2]] == 5}
+}
+
+test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
+    set port0 [get_instance_attrib redis 0 port]
+    assert {[lindex [R 3 role] 2] == $port0}
+    assert {[lindex [R 6 role] 2] == $port0}
+    assert {[lindex [R 9 role] 2] == $port0}
+    assert {[lindex [R 12 role] 2] == $port0}
+    assert {[lindex [R 15 role] 2] == $port0}
+}
+
+test {Instance #3, #6, #9, #12 and #15 synced with the master} {
+    wait_for_condition 1000 50 {
+        [RI 3 master_link_status] eq {up} &&
+        [RI 6 master_link_status] eq {up} &&
+        [RI 9 master_link_status] eq {up} &&
+        [RI 12 master_link_status] eq {up} &&
+        [RI 15 master_link_status] eq {up}
+    } else {
+        fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
+    }
+}
+
+proc master_detected {instances} {
+    foreach instance [dict keys $instances] {
+        if {[RI $instance role] eq {master}} {
+            return true
+        }
+    }
+
+    return false
+}
+
+test "New Master down consecutively" {
+    set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
+
+    set loops [expr {[dict size $instances]-1}]
+    for {set i 0} {$i < $loops} {incr i} {
+        set master_id -1
+        foreach instance [dict keys $instances] {
+            if {[RI $instance role] eq {master}} {
+                set master_id $instance
+                break;
+            }
+        }
+
+        if {$master_id eq -1} {
+            fail "no master detected, #loop $i"
+        }
+
+        set instances [dict remove $instances $master_id]
+
+        kill_instance redis $master_id
+        wait_for_condition 1000 50 {
+            [master_detected $instances]
+        } else {
+            failover "No failover detected when master $master_id fails"
+        }
+
+        assert_cluster_state ok
+    }
+}
author	Salvatore Sanfilippo <antirez@gmail.com>	2018-07-03 12:36:53 +0200
committer	GitHub <noreply@github.com>	2018-07-03 12:36:53 +0200
commit	02e385169eb9a0c5710062211e6916d7f8aa38c0 (patch)
tree	9dd72ca78dd7819d829edce2a5b5db5ef5b33662
parent	2edcafb35d3cc2dcc516ba68bb173f7765424eea (diff)
parent	2e167f7d0e9abb5c49f8f0fb22a3fe2340ad0e31 (diff)
download	redis-02e385169eb9a0c5710062211e6916d7f8aa38c0.tar.gz