diff options
Diffstat (limited to 'tests/integration/replication.tcl')
-rw-r--r-- | tests/integration/replication.tcl | 166 |
1 files changed, 165 insertions, 1 deletions
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 5d32555b0..4bd1f47f7 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -319,7 +319,7 @@ start_server {tags {"repl"}} { } } -test {slave fails full sync and diskless load swapdb recoveres it} { +test {slave fails full sync and diskless load swapdb recovers it} { start_server {tags {"repl"}} { set slave [srv 0 client] set slave_host [srv 0 host] @@ -466,3 +466,167 @@ test {diskless loading short read} { } } +# get current stime and utime metrics for a thread (since it's creation) +proc get_cpu_metrics { statfile } { + if { [ catch { + set fid [ open $statfile r ] + set data [ read $fid 1024 ] + ::close $fid + set data [ split $data ] + + ;## number of jiffies it has been scheduled... + set utime [ lindex $data 13 ] + set stime [ lindex $data 14 ] + } err ] } { + error "assertion:can't parse /proc: $err" + } + set mstime [clock milliseconds] + return [ list $mstime $utime $stime ] +} + +# compute %utime and %stime of a thread between two measurements +proc compute_cpu_usage {start end} { + set clock_ticks [exec getconf CLK_TCK] + # convert ms time to jiffies and calc delta + set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ] + set utime [ expr { [lindex $end 1] - [lindex $start 1] } ] + set stime [ expr { [lindex $end 2] - [lindex $start 2] } ] + set pucpu [ expr { ($utime / $dtime) * 100 } ] + set pscpu [ expr { ($stime / $dtime) * 100 } ] + return [ list $pucpu $pscpu ] +} + + +# test diskless rdb pipe with multiple replicas, which may drop half way +start_server {tags {"repl"}} { + set master [srv 0 client] + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 1 + set master_host [srv 0 host] + set master_port [srv 0 port] + set master_pid [srv 0 pid] + # put enough data in the db that the rdb file will be bigger than the socket buffers + # and since we'll have key-load-delay of 100, 10000 keys will take at least 1 second + # we also need the replica to process requests during transfer (which it does only once in 2mb) + $master debug populate 10000 test 10000 + $master config set rdbcompression no + # If running on Linux, we also measure utime/stime to detect possible I/O handling issues + set os [catch {exec unamee}] + set measure_time [expr {$os == "Linux"} ? 1 : 0] + foreach all_drop {no slow fast all} { + test "diskless $all_drop replicas drop during rdb pipe" { + set replicas {} + set replicas_alive {} + # start one replica that will read the rdb fast, and one that will be slow + start_server {} { + lappend replicas [srv 0 client] + lappend replicas_alive [srv 0 client] + start_server {} { + lappend replicas [srv 0 client] + lappend replicas_alive [srv 0 client] + + # start replication + # it's enough for just one replica to be slow, and have it's write handler enabled + # so that the whole rdb generation process is bound to that + [lindex $replicas 0] config set repl-diskless-load swapdb + [lindex $replicas 0] config set key-load-delay 100 + [lindex $replicas 0] replicaof $master_host $master_port + [lindex $replicas 1] replicaof $master_host $master_port + + # wait for the replicas to start reading the rdb + # using the log file since the replica only responds to INFO once in 2mb + wait_for_log_message -1 "*Loading DB in memory*" 8 800 10 + + if {$measure_time} { + set master_statfile "/proc/$master_pid/stat" + set master_start_metrics [get_cpu_metrics $master_statfile] + set start_time [clock seconds] + } + + # wait a while so that the pipe socket writer will be + # blocked on write (since replica 0 is slow to read from the socket) + after 500 + + # add some command to be present in the command stream after the rdb. + $master incr $all_drop + + # disconnect replicas depending on the current test + if {$all_drop == "all" || $all_drop == "fast"} { + exec kill [srv 0 pid] + set replicas_alive [lreplace $replicas_alive 1 1] + } + if {$all_drop == "all" || $all_drop == "slow"} { + exec kill [srv -1 pid] + set replicas_alive [lreplace $replicas_alive 0 0] + } + + # wait for rdb child to exit + wait_for_condition 500 100 { + [s -2 rdb_bgsave_in_progress] == 0 + } else { + fail "rdb child didn't terminate" + } + + # make sure we got what we were aiming for, by looking for the message in the log file + if {$all_drop == "all"} { + wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" 12 1 1 + } + if {$all_drop == "no"} { + wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" 12 1 1 + } + if {$all_drop == "slow" || $all_drop == "fast"} { + wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" 12 1 1 + } + + # make sure we don't have a busy loop going thought epoll_wait + if {$measure_time} { + set master_end_metrics [get_cpu_metrics $master_statfile] + set time_elapsed [expr {[clock seconds]-$start_time}] + set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics] + set master_utime [lindex $master_cpu 0] + set master_stime [lindex $master_cpu 1] + if {$::verbose} { + puts "elapsed: $time_elapsed" + puts "master utime: $master_utime" + puts "master stime: $master_stime" + } + if {$all_drop == "all" || $all_drop == "slow"} { + assert {$master_utime < 70} + assert {$master_stime < 70} + } + if {$all_drop == "none" || $all_drop == "fast"} { + assert {$master_utime < 15} + assert {$master_stime < 15} + } + } + + # verify the data integrity + foreach replica $replicas_alive { + # Wait that replicas acknowledge they are online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. + wait_for_condition 50 100 { + [lindex [$replica role] 3] eq {connected} + } else { + fail "replicas still not connected after some time" + } + + # Make sure that replicas and master have same + # number of keys + wait_for_condition 50 100 { + [$master dbsize] == [$replica dbsize] + } else { + fail "Different number of keys between master and replicas after too long time." + } + + # Check digests + set digest [$master debug digest] + set digest0 [$replica debug digest] + assert {$digest ne 0000000000000000000000000000000000000000} + assert {$digest eq $digest0} + } + } + } + } + } +} |