summaryrefslogtreecommitdiff
path: root/tests/cluster
diff options
context:
space:
mode:
authorOran Agra <oran@redislabs.com>2020-09-07 18:06:25 +0300
committerGitHub <noreply@github.com>2020-09-07 18:06:25 +0300
commitb491d477c3062d1fb064a794d74643c82d1c4adc (patch)
tree71a2c0e4a7630ebba6ea7f96d92dc90a9f4124d8 /tests/cluster
parent2df4cb93acabf10bb0ff39c12030791b0947e719 (diff)
downloadredis-b491d477c3062d1fb064a794d74643c82d1c4adc.tar.gz
Fix cluster consistency-check test (#7754)
This test was failing from time to time see discussion at the bottom of #7635 This was probably due to timing, the DEBUG SLEEP executed by redis-cli didn't sleep for enough time. This commit changes: 1) use SET-ACTIVE-EXPIRE instead of DEBUG SLEEP 2) reduce many `after` sleeps with retry loops to speed up the test. 3) add many comment explaining the different steps of the test and it's purpose. 4) config appendonly before populating the volatile keys, so that they'll be part of the AOF command stream rather than the preamble RDB portion. other complications: recently kill_instance switched from SIGKILL to SIGTERM, and this would sometimes fail since there was an AOFRW running in the background. now we wait for it to end before attempting the kill.
Diffstat (limited to 'tests/cluster')
-rw-r--r--tests/cluster/tests/14-consistency-check.tcl84
1 files changed, 55 insertions, 29 deletions
diff --git a/tests/cluster/tests/14-consistency-check.tcl b/tests/cluster/tests/14-consistency-check.tcl
index 5a80dd0df..ddc0570e6 100644
--- a/tests/cluster/tests/14-consistency-check.tcl
+++ b/tests/cluster/tests/14-consistency-check.tcl
@@ -39,53 +39,79 @@ proc cluster_write_keys_with_expire {id ttl} {
$cluster close
}
+# make sure that replica who restarts from persistence will load keys
+# that have already expired, critical for correct execution of commands
+# that arrive from the master
proc test_slave_load_expired_keys {aof} {
test "Slave expired keys is loaded when restarted: appendonly=$aof" {
set master_id [find_non_empty_master]
set replica_id [get_one_of_my_replica $master_id]
- set master_dbsize [R $master_id dbsize]
- set slave_dbsize [R $replica_id dbsize]
- assert_equal $master_dbsize $slave_dbsize
-
- set data_ttl 5
- cluster_write_keys_with_expire $master_id $data_ttl
- after 100
- set replica_dbsize_1 [R $replica_id dbsize]
- assert {$replica_dbsize_1 > $slave_dbsize}
+ set master_dbsize_0 [R $master_id dbsize]
+ set replica_dbsize_0 [R $replica_id dbsize]
+ assert_equal $master_dbsize_0 $replica_dbsize_0
+ # config the replica persistency and rewrite the config file to survive restart
+ # note that this needs to be done before populating the volatile keys since
+ # that triggers and AOFRW, and we rather the AOF file to have SETEX commands
+ # rather than an RDB with volatile keys
R $replica_id config set appendonly $aof
R $replica_id config rewrite
- set start_time [clock seconds]
- set end_time [expr $start_time+$data_ttl+2]
- R $replica_id save
- set replica_dbsize_2 [R $replica_id dbsize]
- assert {$replica_dbsize_2 > $slave_dbsize}
+ # fill with 100 keys with 3 second TTL
+ set data_ttl 3
+ cluster_write_keys_with_expire $master_id $data_ttl
+
+ # wait for replica to be in sync with master
+ wait_for_condition 500 10 {
+ [R $replica_id dbsize] eq [R $master_id dbsize]
+ } else {
+ fail "replica didn't sync"
+ }
+
+ set replica_dbsize_1 [R $replica_id dbsize]
+ assert {$replica_dbsize_1 > $replica_dbsize_0}
+
+ # make replica create persistence file
+ if {$aof == "yes"} {
+ # we need to wait for the initial AOFRW to be done, otherwise
+ # kill_instance (which now uses SIGTERM will fail ("Writing initial AOF, can't exit")
+ wait_for_condition 100 10 {
+ [RI $replica_id aof_rewrite_in_progress] eq 0
+ } else {
+ fail "keys didn't expire"
+ }
+ } else {
+ R $replica_id save
+ }
+
+ # kill the replica (would stay down until re-started)
kill_instance redis $replica_id
- set master_port [get_instance_attrib redis $master_id port]
- exec ../../../src/redis-cli \
- -h 127.0.0.1 -p $master_port \
- {*}[rediscli_tls_config "../../../tests"] \
- debug sleep [expr $data_ttl+3] > /dev/null &
+ # Make sure the master doesn't do active expire (sending DELs to the replica)
+ R $master_id DEBUG SET-ACTIVE-EXPIRE 0
- while {[clock seconds] <= $end_time} {
- #wait for $data_ttl seconds
- }
+ # wait for all the keys to get logically expired
+ after [expr $data_ttl*1000]
+
+ # start the replica again (loading an RDB or AOF file)
restart_instance redis $replica_id
- wait_for_condition 200 50 {
- [R $replica_id ping] eq {PONG}
+ # make sure the keys are still there
+ set replica_dbsize_3 [R $replica_id dbsize]
+ assert {$replica_dbsize_3 > $replica_dbsize_0}
+
+ # restore settings
+ R $master_id DEBUG SET-ACTIVE-EXPIRE 1
+
+ # wait for the master to expire all keys and replica to get the DELs
+ wait_for_condition 500 10 {
+ [R $replica_id dbsize] eq $master_dbsize_0
} else {
- fail "replica #$replica_id not started"
+ fail "keys didn't expire"
}
-
- set replica_dbsize_3 [R $replica_id dbsize]
- assert {$replica_dbsize_3 > $slave_dbsize}
}
}
test_slave_load_expired_keys no
-after 5000
test_slave_load_expired_keys yes