From ec593b582e12ba1fb715b02b0f3ccb8b999ceaa4 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 22 Feb 2014 17:26:30 +0100 Subject: Sentinel test: framework improved and conf-update unit added. It is now possible to kill and restart sentinel or redis instances for more real-world testing. The 01 unit tests the capability of Sentinel to update the configuration of Sentinels rejoining the cluster, however the test is pretty trivial and more tests should be added. --- tests/sentinel-tests/00-base.tcl | 17 ++------- tests/sentinel-tests/01-conf-update.tcl | 38 +++++++++++++++++++ tests/sentinel-tests/includes/init-tests.tcl | 11 +++++- tests/sentinel.tcl | 56 ++++++++++++++++++++++++++-- tests/support/test.tcl | 10 ++++- 5 files changed, 112 insertions(+), 20 deletions(-) create mode 100644 tests/sentinel-tests/01-conf-update.tcl diff --git a/tests/sentinel-tests/00-base.tcl b/tests/sentinel-tests/00-base.tcl index 0587c625c..b8dfa70ca 100644 --- a/tests/sentinel-tests/00-base.tcl +++ b/tests/sentinel-tests/00-base.tcl @@ -23,20 +23,11 @@ test "Sentinels are able to auto-discover slaves" { } } -test "Can change master parameters via SENTINEL SET" { - foreach_sentinel_id id { - S $id SENTINEL SET mymaster down-after-milliseconds 2000 - } - foreach_sentinel_id id { - assert {[dict get [S $id sentinel master mymaster] down-after-milliseconds] == 2000} - } -} - test "Basic failover works if the master is down" { set old_port [RI $master_id tcp_port] set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] assert {[lindex $addr 1] == $old_port} - R $master_id debug sleep 5 + R $master_id debug sleep 10 foreach_sentinel_id id { wait_for_condition 100 50 { [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port @@ -79,7 +70,7 @@ test "ODOWN is not possible without enough Sentinels reports" { set old_port [RI $master_id tcp_port] set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] assert {[lindex $addr 1] == $old_port} - R $master_id debug sleep 5 + R $master_id debug sleep 10 # Make sure failover did not happened. set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] @@ -95,7 +86,7 @@ test "Failover is not possible without majority agreement" { for {set id 0} {$id < $quorum} {incr id} { S $id SENTINEL REMOVE mymaster } - R $master_id debug sleep 5 + R $master_id debug sleep 10 # Make sure failover did not happened. set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] @@ -124,7 +115,7 @@ test "Failover works if we configure for absolute agreement" { } } - R $master_id debug sleep 5 + R $master_id debug sleep 10 foreach_sentinel_id id { wait_for_condition 1000 50 { [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port diff --git a/tests/sentinel-tests/01-conf-update.tcl b/tests/sentinel-tests/01-conf-update.tcl new file mode 100644 index 000000000..493237075 --- /dev/null +++ b/tests/sentinel-tests/01-conf-update.tcl @@ -0,0 +1,38 @@ +# Test Sentinel configuration consistency after partitions heal. + +source "../sentinel-tests/includes/init-tests.tcl" + +test "We can failover with Sentinel 1 crashed" { + foreach_sentinel_id id { + S $id SENTINEL SET mymaster down-after-milliseconds 2000 + } + + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + + # Crash Sentinel 1 + kill_instance sentinel 1 + + R $master_id debug sleep 10 + foreach_sentinel_id id { + if {$id != 1} { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Sentinel $id did not received failover info" + } + } + } + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] +} + +test "After Sentinel 1 is restarted, its config gets updated" { + restart_instance sentinel 1 + wait_for_condition 1000 50 { + [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Restarted Sentinel did not received failover info" + } +} diff --git a/tests/sentinel-tests/includes/init-tests.tcl b/tests/sentinel-tests/includes/init-tests.tcl index 302f64b65..82beeea4f 100644 --- a/tests/sentinel-tests/includes/init-tests.tcl +++ b/tests/sentinel-tests/includes/init-tests.tcl @@ -17,7 +17,16 @@ test "Sentinels can start monitoring a master" { } foreach_sentinel_id id { assert {[S $id sentinel master mymaster] ne {}} + S $id SENTINEL SET mymaster down-after-milliseconds 2000 } } - +test "Sentinels can talk with the master" { + foreach_sentinel_id id { + wait_for_condition 100 50 { + [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0 + } else { + fail "Sentinel $id can't talk with the master." + } + } +} diff --git a/tests/sentinel.tcl b/tests/sentinel.tcl index 4b49ed4c7..f1c1669ac 100644 --- a/tests/sentinel.tcl +++ b/tests/sentinel.tcl @@ -53,8 +53,8 @@ proc spawn_instance {type base_port count} { } else { set prgname redis-sentinel } - set sentinel_pid [exec ../../src/${prgname} $cfgfile &] - lappend ::pids $sentinel_pid + set pid [exec ../../src/${prgname} $cfgfile &] + lappend ::pids $pid # Check availability if {[server_is_up 127.0.0.1 $port 100] == 0} { @@ -63,7 +63,7 @@ proc spawn_instance {type base_port count} { # Push the instance into the right list lappend ::${type}_instances [list \ - pid $sentinel_pid \ + pid $pid \ host 127.0.0.1 \ port $port \ link [redis 127.0.0.1 $port] \ @@ -212,6 +212,13 @@ proc get_instance_attrib {type id attrib} { dict get [lindex [set ::${type}_instances] $id] $attrib } +# Set the specific attribute of the specified instance type, id. +proc set_instance_attrib {type id attrib newval} { + set d [lindex [set ::${type}_instances] $id] + dict set d $attrib $newval + lset ::${type}_instances $id $d +} + # Create a master-slave cluster of the given number of total instances. # The first instance "0" is the master, all others are configured as # slaves. @@ -219,8 +226,8 @@ proc create_redis_master_slave_cluster n { foreach_redis_id id { if {$id == 0} { # Our master. - R $id flushall R $id slaveof no one + R $id flushall } elseif {$id < $n} { R $id slaveof [get_instance_attrib redis 0 host] \ [get_instance_attrib redis 0 port] @@ -246,6 +253,47 @@ proc get_instance_id_by_port {type port} { fail "Instance $type port $port not found." } +# Kill an instance of the specified type/id with SIGKILL. +# This function will mark the instance PID as -1 to remember that this instance +# is no longer running and will remove its PID from the list of pids that +# we kill at cleanup. +# +# The instance can be restarted with restart-instance. +proc kill_instance {type id} { + set pid [get_instance_attrib $type $id pid] + exec kill -9 $pid + set_instance_attrib $type $id pid -1 + set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance + + # Remove the PID from the list of pids to kill at exit. + set ::pids [lsearch -all -inline -not -exact $::pids $pid] +} + +# Restart an instance previously killed by kill_instance +proc restart_instance {type id} { + set dirname "${type}_${id}" + set cfgfile [file join $dirname $type.conf] + set port [get_instance_attrib $type $id port] + + # Execute the instance with its old setup and append the new pid + # file for cleanup. + if {$type eq "redis"} { + set prgname redis-server + } else { + set prgname redis-sentinel + } + set pid [exec ../../src/${prgname} $cfgfile &] + lappend ::pids $pid + + # Check that the instance is running + if {[server_is_up 127.0.0.1 $port 100] == 0} { + abort_sentinel_test "Problems starting $type #$j: ping timeout" + } + + # Connect with it with a fresh link + set_instance_attrib $type $id link [redis 127.0.0.1 $port] +} + if {[catch main e]} { puts $::errorInfo cleanup diff --git a/tests/support/test.tcl b/tests/support/test.tcl index 96b529d7a..bf2cb0e2f 100644 --- a/tests/support/test.tcl +++ b/tests/support/test.tcl @@ -53,11 +53,17 @@ proc assert_type {type key} { # executed. proc wait_for_condition {maxtries delay e _else_ elsescript} { while {[incr maxtries -1] >= 0} { - if {[uplevel 1 [list expr $e]]} break + set errcode [catch {uplevel 1 [list expr $e]} result] + if {$errcode == 0} { + if {$result} break + } else { + return -code $errcode $result + } after $delay } if {$maxtries == -1} { - uplevel 1 $elsescript + set errcode [catch [uplevel 1 $elsescript] result] + return -code $errcode $result } } -- cgit v1.2.1