summaryrefslogtreecommitdiff
path: root/tests/sentinel
diff options
context:
space:
mode:
Diffstat (limited to 'tests/sentinel')
-rw-r--r--tests/sentinel/run.tcl19
-rw-r--r--tests/sentinel/tests/00-base.tcl126
-rw-r--r--tests/sentinel/tests/01-conf-update.tcl39
-rw-r--r--tests/sentinel/tests/02-slaves-reconf.tcl83
-rw-r--r--tests/sentinel/tests/03-runtime-reconf.tcl1
-rw-r--r--tests/sentinel/tests/04-slave-selection.tcl5
-rw-r--r--tests/sentinel/tests/includes/init-tests.tcl67
-rw-r--r--tests/sentinel/tmp/.gitignore2
8 files changed, 342 insertions, 0 deletions
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
new file mode 100644
index 000000000..78d2f61bc
--- /dev/null
+++ b/tests/sentinel/run.tcl
@@ -0,0 +1,19 @@
+# Sentinel test suite. Copyright (C) 2014 Salvatore Sanfilippo antirez@gmail.com
+# This softare is released under the BSD License. See the COPYING file for
+# more information.
+
+cd tests/sentinel
+source ../instances.tcl
+
+proc main {} {
+ parse_options
+ spawn_instance sentinel $::sentinel_base_port $::instances_count
+ spawn_instance redis $::redis_base_port $::instances_count
+ run_tests
+ cleanup
+}
+
+if {[catch main e]} {
+ puts $::errorInfo
+ cleanup
+}
diff --git a/tests/sentinel/tests/00-base.tcl b/tests/sentinel/tests/00-base.tcl
new file mode 100644
index 000000000..a79d0c371
--- /dev/null
+++ b/tests/sentinel/tests/00-base.tcl
@@ -0,0 +1,126 @@
+# Check the basic monitoring and failover capabilities.
+
+source "../tests/includes/init-tests.tcl"
+
+if {$::simulate_error} {
+ test "This test will fail" {
+ fail "Simulated error"
+ }
+}
+
+test "Basic failover works if the master is down" {
+ set old_port [RI $master_id tcp_port]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not received failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
+
+test "All the other slaves now point to the new master" {
+ foreach_redis_id id {
+ if {$id != $master_id && $id != 0} {
+ wait_for_condition 1000 50 {
+ [RI $id master_port] == [lindex $addr 1]
+ } else {
+ fail "Redis ID $id not configured to replicate with new master"
+ }
+ }
+ }
+}
+
+test "The old master eventually gets reconfigured as a slave" {
+ wait_for_condition 1000 50 {
+ [RI 0 master_port] == [lindex $addr 1]
+ } else {
+ fail "Old master not reconfigured as slave of new master"
+ }
+}
+
+test "ODOWN is not possible without N (quorum) Sentinels reports" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum [expr $sentinels+1]
+ }
+ set old_port [RI $master_id tcp_port]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+
+ # Make sure failover did not happened.
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ restart_instance redis $master_id
+}
+
+test "Failover is not possible without majority agreement" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $quorum
+ }
+
+ # Crash majority of sentinels
+ for {set id 0} {$id < $quorum} {incr id} {
+ kill_instance sentinel $id
+ }
+
+ # Kill the current master
+ kill_instance redis $master_id
+
+ # Make sure failover did not happened.
+ set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ restart_instance redis $master_id
+
+ # Cleanup: restart Sentinels to monitor the master.
+ for {set id 0} {$id < $quorum} {incr id} {
+ restart_instance sentinel $id
+ }
+}
+
+test "Failover works if we configure for absolute agreement" {
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $sentinels
+ }
+
+ # Wait for Sentinels to monitor the master again
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [dict get [S $id SENTINEL MASTER mymaster] info-refresh] < 100000
+ } else {
+ fail "At least one Sentinel is not monitoring the master"
+ }
+ }
+
+ kill_instance redis $master_id
+
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not received failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+
+ # Set the min ODOWN agreement back to strict majority.
+ foreach_sentinel_id id {
+ S $id SENTINEL SET mymaster quorum $quorum
+ }
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/01-conf-update.tcl b/tests/sentinel/tests/01-conf-update.tcl
new file mode 100644
index 000000000..4998104d2
--- /dev/null
+++ b/tests/sentinel/tests/01-conf-update.tcl
@@ -0,0 +1,39 @@
+# Test Sentinel configuration consistency after partitions heal.
+
+source "../tests/includes/init-tests.tcl"
+
+test "We can failover with Sentinel 1 crashed" {
+ set old_port [RI $master_id tcp_port]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+
+ # Crash Sentinel 1
+ kill_instance sentinel 1
+
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ if {$id != 1} {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "Sentinel $id did not received failover info"
+ }
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+}
+
+test "After Sentinel 1 is restarted, its config gets updated" {
+ restart_instance sentinel 1
+ wait_for_condition 1000 50 {
+ [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "Restarted Sentinel did not received failover info"
+ }
+}
+
+test "New master [join $addr {:}] role matches" {
+ assert {[RI $master_id role] eq {master}}
+}
diff --git a/tests/sentinel/tests/02-slaves-reconf.tcl b/tests/sentinel/tests/02-slaves-reconf.tcl
new file mode 100644
index 000000000..868bae5ec
--- /dev/null
+++ b/tests/sentinel/tests/02-slaves-reconf.tcl
@@ -0,0 +1,83 @@
+# Check that slaves are reconfigured at a latter time if they are partitioned.
+#
+# Here we should test:
+# 1) That slaves point to the new master after failover.
+# 2) That partitioned slaves point to new master when they are partitioned
+# away during failover and return at a latter time.
+
+source "../tests/includes/init-tests.tcl"
+
+proc 03_test_slaves_replication {} {
+ uplevel 1 {
+ test "Check that slaves replicate from current master" {
+ set master_port [RI $master_id tcp_port]
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+ if {[instance_is_killed redis $id]} continue
+ wait_for_condition 1000 50 {
+ [RI $id master_port] == $master_port
+ } else {
+ fail "Redis slave $id is replicating from wrong master"
+ }
+ }
+ }
+ }
+}
+
+proc 03_crash_and_failover {} {
+ uplevel 1 {
+ test "Crash the master and force a failover" {
+ set old_port [RI $master_id tcp_port]
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ assert {[lindex $addr 1] == $old_port}
+ kill_instance redis $master_id
+ foreach_sentinel_id id {
+ wait_for_condition 1000 50 {
+ [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port
+ } else {
+ fail "At least one Sentinel did not received failover info"
+ }
+ }
+ restart_instance redis $master_id
+ set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster]
+ set master_id [get_instance_id_by_port redis [lindex $addr 1]]
+ }
+ }
+}
+
+03_test_slaves_replication
+03_crash_and_failover
+03_test_slaves_replication
+
+test "Kill a slave instance" {
+ foreach_redis_id id {
+ if {$id == $master_id} continue
+ set killed_slave_id $id
+ kill_instance redis $id
+ break
+ }
+}
+
+03_crash_and_failover
+03_test_slaves_replication
+
+test "Wait for failover to end" {
+ set inprogress 1
+ while {$inprogress} {
+ set inprogress 0
+ foreach_sentinel_id id {
+ if {[dict exists [S $id SENTINEL MASTER mymaster] failover-state]} {
+ incr inprogress
+ }
+ }
+ if {$inprogress} {after 100}
+ }
+}
+
+test "Restart killed slave and test replication of slaves again..." {
+ restart_instance redis $killed_slave_id
+}
+
+# Now we check if the slave rejoining the partition is reconfigured even
+# if the failover finished.
+03_test_slaves_replication
diff --git a/tests/sentinel/tests/03-runtime-reconf.tcl b/tests/sentinel/tests/03-runtime-reconf.tcl
new file mode 100644
index 000000000..426596c37
--- /dev/null
+++ b/tests/sentinel/tests/03-runtime-reconf.tcl
@@ -0,0 +1 @@
+# Test runtime reconfiguration command SENTINEL SET.
diff --git a/tests/sentinel/tests/04-slave-selection.tcl b/tests/sentinel/tests/04-slave-selection.tcl
new file mode 100644
index 000000000..3d2ca6484
--- /dev/null
+++ b/tests/sentinel/tests/04-slave-selection.tcl
@@ -0,0 +1,5 @@
+# Test slave selection algorithm.
+#
+# This unit should test:
+# 1) That when there are no suitable slaves no failover is performed.
+# 2) That among the available slaves, the one with better offset is picked.
diff --git a/tests/sentinel/tests/includes/init-tests.tcl b/tests/sentinel/tests/includes/init-tests.tcl
new file mode 100644
index 000000000..cb359ea1b
--- /dev/null
+++ b/tests/sentinel/tests/includes/init-tests.tcl
@@ -0,0 +1,67 @@
+# Initialization tests -- most units will start including this.
+
+test "(init) Restart killed instances" {
+ foreach type {redis sentinel} {
+ foreach_${type}_id id {
+ if {[get_instance_attrib $type $id pid] == -1} {
+ puts -nonewline "$type/$id "
+ flush stdout
+ restart_instance $type $id
+ }
+ }
+ }
+}
+
+set redis_slaves 4
+test "(init) Create a master-slaves cluster of [expr $redis_slaves+1] instances" {
+ create_redis_master_slave_cluster [expr {$redis_slaves+1}]
+}
+set master_id 0
+
+test "(init) Sentinels can start monitoring a master" {
+ set sentinels [llength $::sentinel_instances]
+ set quorum [expr {$sentinels/2+1}]
+ foreach_sentinel_id id {
+ catch {S $id SENTINEL REMOVE mymaster}
+ S $id SENTINEL MONITOR mymaster \
+ [get_instance_attrib redis $master_id host] \
+ [get_instance_attrib redis $master_id port] $quorum
+ }
+ foreach_sentinel_id id {
+ assert {[S $id sentinel master mymaster] ne {}}
+ S $id SENTINEL SET mymaster down-after-milliseconds 2000
+ S $id SENTINEL SET mymaster failover-timeout 20000
+ S $id SENTINEL SET mymaster parallel-syncs 10
+ }
+}
+
+test "(init) Sentinels can talk with the master" {
+ foreach_sentinel_id id {
+ wait_for_condition 100 50 {
+ [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0
+ } else {
+ fail "Sentinel $id can't talk with the master."
+ }
+ }
+}
+
+test "(init) Sentinels are able to auto-discover other sentinels" {
+ set sentinels [llength $::sentinel_instances]
+ foreach_sentinel_id id {
+ wait_for_condition 100 50 {
+ [dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1)
+ } else {
+ fail "At least some sentinel can't detect some other sentinel"
+ }
+ }
+}
+
+test "(init) Sentinels are able to auto-discover slaves" {
+ foreach_sentinel_id id {
+ wait_for_condition 100 50 {
+ [dict get [S $id SENTINEL MASTER mymaster] num-slaves] == $redis_slaves
+ } else {
+ fail "At least some sentinel can't detect some slave"
+ }
+ }
+}
diff --git a/tests/sentinel/tmp/.gitignore b/tests/sentinel/tmp/.gitignore
new file mode 100644
index 000000000..f581f73e2
--- /dev/null
+++ b/tests/sentinel/tmp/.gitignore
@@ -0,0 +1,2 @@
+redis_*
+sentinel_*