summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2019-05-05 08:19:52 +0300
committerOran Agra <oran@redislabs.com>2019-05-05 08:25:01 +0300
commitba809f26d4bd81d23fa929d0c018f235ab298564 (patch)
tree567a60afcbd58f6d06be7465aa7a25906a73663f
parent0a6090bfd8fbec26f682ff0a1dc7a43699e0c9b7 (diff)
downloadredis-ba809f26d4bd81d23fa929d0c018f235ab298564.tar.gz
make replication tests more stable on slow machines
solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant.
-rw-r--r--tests/integration/psync2.tcl5
-rw-r--r--tests/integration/replication-psync.tcl26
-rw-r--r--tests/unit/maxmemory.tcl7
3 files changed, 34 insertions, 4 deletions
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
index 8663d6fcc..d1212b640 100644
--- a/tests/integration/psync2.tcl
+++ b/tests/integration/psync2.tcl
@@ -166,12 +166,15 @@ start_server {} {
# Pick a random slave
set slave_id [expr {($master_id+1)%5}]
set sync_count [status $R($master_id) sync_full]
+ set sync_partial [status $R($master_id) sync_partial_ok]
catch {
$R($slave_id) config rewrite
$R($slave_id) debug restart
}
+ # note: just waiting for connected_slaves==4 has a race condition since
+ # we might do the check before the master realized that the slave disconnected
wait_for_condition 50 1000 {
- [status $R($master_id) connected_slaves] == 4
+ [status $R($master_id) sync_partial_ok] == $sync_partial + 1
} else {
fail "Replica not reconnecting"
}
diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl
index a3bce2a4c..bf8682446 100644
--- a/tests/integration/replication-psync.tcl
+++ b/tests/integration/replication-psync.tcl
@@ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec
stop_bg_complex_data $load_handle0
stop_bg_complex_data $load_handle1
stop_bg_complex_data $load_handle2
+
+ # Wait for the slave to reach the "online"
+ # state from the POV of the master.
+ set retry 5000
+ while {$retry} {
+ set info [$master info]
+ if {[string match {*slave0:*state=online*} $info]} {
+ break
+ } else {
+ incr retry -1
+ after 100
+ }
+ }
+ if {$retry == 0} {
+ error "assertion:Slave not correctly synchronized"
+ }
+
+ # Wait that slave acknowledge it is online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues. (-LOADING error)
+ wait_for_condition 5000 100 {
+ [lindex [$slave role] 3] eq {connected}
+ } else {
+ fail "Slave still not connected after some time"
+ }
+
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl
index 1def57af5..0f64ddc18 100644
--- a/tests/unit/maxmemory.tcl
+++ b/tests/unit/maxmemory.tcl
@@ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
}
# make sure master doesn't disconnect slave because of timeout
- $master config set repl-timeout 300 ;# 5 minutes
+ $master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines)
$master config set maxmemory-policy allkeys-random
$master config set client-output-buffer-limit "replica 100000000 100000000 300"
$master config set repl-backlog-size [expr {10*1024}]
@@ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
assert {[$master dbsize] == 100}
assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers
- assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
+ set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
+ assert {$delta < $delta_max && $delta > -$delta_max}
$master client kill type slave
set killed_used [s -1 used_memory]
@@ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline}
set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}]
set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}]
assert {$killed_slave_buf == 0}
- assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB
+ assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max}
}
# unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server