8 files changed, 342 insertions, 0 deletions
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
new file mode 100644
index 000000000..78d2f61bc
--- /dev/null
+++ b/tests/sentinel/run.tcl
@@ -0,0 +1,19 @@
+# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
+# This softare is released under the BSD License. See the COPYING file for
+# more information.
+
+cd tests/sentinel
+source ../instances.tcl
+
+proc main {} {
+    parse_options
+    spawn_instance sentinel $::sentinel_base_port $::instances_count
+    spawn_instance redis $::redis_base_port $::instances_count
+    run_tests
+    cleanup
+}
+
+if {[catch main e]} {
+    puts $::errorInfo
+    cleanup
+}
diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
new file mode 100644
index 000000000..a79d0c371
--- /dev/null
+++ b/tests/sentinel/tests/00-base.tcl
@@ -0,0 +1,126 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+if {$::simulate_error} {
+    test "This test will fail" {
+        fail "Simulated error"
+    }
+}
+
+test "Basic failover works if the master is down" {
+    set old_port [RI $master_id tcp_port]
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    assert {[lindex $addr 1] == $old_port}
+    kill_instance redis $master_id
+    foreach_sentinel_id id {
+        wait_for_condition 1000 50 {
+            [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+        } else {
+            fail "At least one Sentinel did not received failover info"
+        }
+    }
+    restart_instance redis $master_id
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "New master [join $addr {:}] role matches" {
+    assert {[RI $master_id role] eq {master}}
+}
+
+test "All the other slaves now point to the new master" {
+    foreach_redis_id id {
+        if {$id != $master_id && $id != 0} {
+            wait_for_condition 1000 50 {
+                [RI $id master_port] == [lindex $addr 1]
+            } else {
+                fail "Redis ID $id not configured to replicate with new master"
+            }
+        }
+    }
+}
+
+test "The old master eventually gets reconfigured as a slave" {
+    wait_for_condition 1000 50 {
+        [RI 0 master_port] == [lindex $addr 1]
+    } else {
+        fail "Old master not reconfigured as slave of new master"
+    }
+}
+
+test "ODOWN is not possible without N (quorum) Sentinels reports" {
+    foreach_sentinel_id id {
+        S $id SENTINEL SET mymaster quorum [expr $sentinels+1]
+    }
+    set old_port [RI $master_id tcp_port]
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    assert {[lindex $addr 1] == $old_port}
+    kill_instance redis $master_id
+
+    # Make sure failover did not happened.
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    assert {[lindex $addr 1] == $old_port}
+    restart_instance redis $master_id
+}
+
+test "Failover is not possible without majority agreement" {
+    foreach_sentinel_id id {
+        S $id SENTINEL SET mymaster quorum $quorum
+    }
+
+    # Crash majority of sentinels
+    for {set id 0} {$id < $quorum} {incr id} {
+        kill_instance sentinel $id
+    }
+
+    # Kill the current master
+    kill_instance redis $master_id
+
+    # Make sure failover did not happened.
+    set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    assert {[lindex $addr 1] == $old_port}
+    restart_instance redis $master_id
+
+    # Cleanup: restart Sentinels to monitor the master.
+    for {set id 0} {$id < $quorum} {incr id} {
+        restart_instance sentinel $id
+    }
+}
+
+test "Failover works if we configure for absolute agreement" {
+    foreach_sentinel_id id {
+        S $id SENTINEL SET mymaster quorum $sentinels
+    }
+
+    # Wait for Sentinels to monitor the master again
+    foreach_sentinel_id id {
+        wait_for_condition 1000 50 {
+            [dict get [S $id SENTINEL MASTER mymaster] info-refresh] < 100000
+        } else {
+            fail "At least one Sentinel is not monitoring the master"
+        }
+    }
+
+    kill_instance redis $master_id
+
+    foreach_sentinel_id id {
+        wait_for_condition 1000 50 {
+            [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+        } else {
+            fail "At least one Sentinel did not received failover info"
+        }
+    }
+    restart_instance redis $master_id
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+
+    # Set the min ODOWN agreement back to strict majority.
+    foreach_sentinel_id id {
+        S $id SENTINEL SET mymaster quorum $quorum
+    }
+}
+
+test "New master [join $addr {:}] role matches" {
+    assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/01-conf-update.tcl b/tests/sentinel/tests/01-conf-update.tcl
new file mode 100644
index 000000000..4998104d2
--- /dev/null
+++ b/tests/sentinel/tests/01-conf-update.tcl
@@ -0,0 +1,39 @@
+# Test Sentinel configuration consistency after partitions heal.
+
+source "../tests/includes/init-tests.tcl"
+
+test "We can failover with Sentinel 1 crashed" {
+    set old_port [RI $master_id tcp_port]
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    assert {[lindex $addr 1] == $old_port}
+
+    # Crash Sentinel 1
+    kill_instance sentinel 1
+
+    kill_instance redis $master_id
+    foreach_sentinel_id id {
+        if {$id != 1} {
+            wait_for_condition 1000 50 {
+                [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+            } else {
+                fail "Sentinel $id did not received failover info"
+            }
+        }
+    }
+    restart_instance redis $master_id
+    set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+    set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "After Sentinel 1 is restarted, its config gets updated" {
+    restart_instance sentinel 1
+    wait_for_condition 1000 50 {
+        [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+    } else {
+        fail "Restarted Sentinel did not received failover info"
+    }
+}
+
+test "New master [join $addr {:}] role matches" {
+    assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/02-slaves-reconf.tcl b/tests/sentinel/tests/02-slaves-reconf.tcl
new file mode 100644
index 000000000..868bae5ec
--- /dev/null
+++ b/tests/sentinel/tests/02-slaves-reconf.tcl
@@ -0,0 +1,83 @@
+# Check that slaves are reconfigured at a latter time if they are partitioned.
+#
+# Here we should test:
+# 1) That slaves point to the new master after failover.
+# 2) That partitioned slaves point to new master when they are partitioned
+#    away during failover and return at a latter time.
+
+source "../tests/includes/init-tests.tcl"
+
+proc 03_test_slaves_replication {} {
+    uplevel 1 {
+        test "Check that slaves replicate from current master" {
+            set master_port [RI $master_id tcp_port]
+            foreach_redis_id id {
+                if {$id == $master_id} continue
+                if {[instance_is_killed redis $id]} continue
+                wait_for_condition 1000 50 {
+                    [RI $id master_port] == $master_port
+                } else {
+                    fail "Redis slave $id is replicating from wrong master"
+                }
+            }
+        }
+    }
+}
+
+proc 03_crash_and_failover {} {
+    uplevel 1 {
+        test "Crash the master and force a failover" {
+            set old_port [RI $master_id tcp_port]
+            set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+            assert {[lindex $addr 1] == $old_port}
+            kill_instance redis $master_id
+            foreach_sentinel_id id {
+                wait_for_condition 1000 50 {
+                    [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+                } else {
+                    fail "At least one Sentinel did not received failover info"
+                }
+            }
+            restart_instance redis $master_id
+            set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+            set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+        }
+    }
+}
+
+03_test_slaves_replication
+03_crash_and_failover
+03_test_slaves_replication
+
+test "Kill a slave instance" {
+    foreach_redis_id id {
+        if {$id == $master_id} continue
+        set killed_slave_id $id
+        kill_instance redis $id
+        break
+    }
+}
+
+03_crash_and_failover
+03_test_slaves_replication
+
+test "Wait for failover to end" {
+    set inprogress 1
+    while {$inprogress} {
+        set inprogress 0
+        foreach_sentinel_id id {
+            if {[dict exists [S $id SENTINEL MASTER mymaster] failover-state]} {
+                incr inprogress
+            }
+        }
+        if {$inprogress} {after 100}
+    }
+}
+
+test "Restart killed slave and test replication of slaves again..." {
+    restart_instance redis $killed_slave_id
+}
+
+# Now we check if the slave rejoining the partition is reconfigured even
+# if the failover finished.
+03_test_slaves_replication
diff --git a/tests/sentinel/tests/03-runtime-reconf.tcl b/tests/sentinel/tests/03-runtime-reconf.tcl
new file mode 100644
index 000000000..426596c37
--- /dev/null
+++ b/tests/sentinel/tests/03-runtime-reconf.tcl
@@ -0,0 +1 @@
+# Test runtime reconfiguration command SENTINEL SET.
diff --git a/tests/sentinel/tests/04-slave-selection.tcl b/tests/sentinel/tests/04-slave-selection.tcl
new file mode 100644
index 000000000..3d2ca6484
--- /dev/null
+++ b/tests/sentinel/tests/04-slave-selection.tcl
@@ -0,0 +1,5 @@
+# Test slave selection algorithm.
+#
+# This unit should test:
+# 1) That when there are no suitable slaves no failover is performed.
+# 2) That among the available slaves, the one with better offset is picked.
diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl
new file mode 100644
index 000000000..cb359ea1b
--- /dev/null
+++ b/tests/sentinel/tests/includes/init-tests.tcl
@@ -0,0 +1,67 @@
+# Initialization tests -- most units will start including this.
+
+test "(init) Restart killed instances" {
+    foreach type {redis sentinel} {
+        foreach_${type}_id id {
+            if {[get_instance_attrib $type $id pid] == -1} {
+                puts -nonewline "$type/$id "
+                flush stdout
+                restart_instance $type $id
+            }
+        }
+    }
+}
+
+set redis_slaves 4
+test "(init) Create a master-slaves cluster of [expr $redis_slaves+1] instances" {
+    create_redis_master_slave_cluster [expr {$redis_slaves+1}]
+}
+set master_id 0
+
+test "(init) Sentinels can start monitoring a master" {
+    set sentinels [llength $::sentinel_instances]
+    set quorum [expr {$sentinels/2+1}]
+    foreach_sentinel_id id {
+        catch {S $id SENTINEL REMOVE mymaster}
+        S $id SENTINEL MONITOR mymaster \
+              [get_instance_attrib redis $master_id host] \
+              [get_instance_attrib redis $master_id port] $quorum
+    }
+    foreach_sentinel_id id {
+        assert {[S $id sentinel master mymaster] ne {}}
+        S $id SENTINEL SET mymaster down-after-milliseconds 2000
+        S $id SENTINEL SET mymaster failover-timeout 20000
+        S $id SENTINEL SET mymaster parallel-syncs 10
+    }
+}
+
+test "(init) Sentinels can talk with the master" {
+    foreach_sentinel_id id {
+        wait_for_condition 100 50 {
+            [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0
+        } else {
+            fail "Sentinel $id can't talk with the master."
+        }
+    }
+}
+
+test "(init) Sentinels are able to auto-discover other sentinels" {
+    set sentinels [llength $::sentinel_instances]
+    foreach_sentinel_id id {
+        wait_for_condition 100 50 {
+            [dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1)
+        } else {
+            fail "At least some sentinel can't detect some other sentinel"
+        }
+    }
+}
+
+test "(init) Sentinels are able to auto-discover slaves" {
+    foreach_sentinel_id id {
+        wait_for_condition 100 50 {
+            [dict get [S $id SENTINEL MASTER mymaster] num-slaves] == $redis_slaves
+        } else {
+            fail "At least some sentinel can't detect some slave"
+        }
+    }
+}
diff --git a/tests/sentinel/tmp/.gitignore b/tests/sentinel/tmp/.gitignore
new file mode 100644
index 000000000..f581f73e2
--- /dev/null
+++ b/tests/sentinel/tmp/.gitignore
@@ -0,0 +1,2 @@
+redis_*
+sentinel_*