summaryrefslogtreecommitdiff
path: root/tests/integration/replication.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'tests/integration/replication.tcl')
-rw-r--r--tests/integration/replication.tcl166
1 files changed, 165 insertions, 1 deletions
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index 5d32555b0..4bd1f47f7 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -319,7 +319,7 @@ start_server {tags {"repl"}} {
}
}
-test {slave fails full sync and diskless load swapdb recoveres it} {
+test {slave fails full sync and diskless load swapdb recovers it} {
start_server {tags {"repl"}} {
set slave [srv 0 client]
set slave_host [srv 0 host]
@@ -466,3 +466,167 @@ test {diskless loading short read} {
}
}
+# get current stime and utime metrics for a thread (since it's creation)
+proc get_cpu_metrics { statfile } {
+ if { [ catch {
+ set fid [ open $statfile r ]
+ set data [ read $fid 1024 ]
+ ::close $fid
+ set data [ split $data ]
+
+ ;## number of jiffies it has been scheduled...
+ set utime [ lindex $data 13 ]
+ set stime [ lindex $data 14 ]
+ } err ] } {
+ error "assertion:can't parse /proc: $err"
+ }
+ set mstime [clock milliseconds]
+ return [ list $mstime $utime $stime ]
+}
+
+# compute %utime and %stime of a thread between two measurements
+proc compute_cpu_usage {start end} {
+ set clock_ticks [exec getconf CLK_TCK]
+ # convert ms time to jiffies and calc delta
+ set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
+ set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
+ set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
+ set pucpu [ expr { ($utime / $dtime) * 100 } ]
+ set pscpu [ expr { ($stime / $dtime) * 100 } ]
+ return [ list $pucpu $pscpu ]
+}
+
+
+# test diskless rdb pipe with multiple replicas, which may drop half way
+start_server {tags {"repl"}} {
+ set master [srv 0 client]
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 1
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ # put enough data in the db that the rdb file will be bigger than the socket buffers
+ # and since we'll have key-load-delay of 100, 10000 keys will take at least 1 second
+ # we also need the replica to process requests during transfer (which it does only once in 2mb)
+ $master debug populate 10000 test 10000
+ $master config set rdbcompression no
+ # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
+ set os [catch {exec unamee}]
+ set measure_time [expr {$os == "Linux"} ? 1 : 0]
+ foreach all_drop {no slow fast all} {
+ test "diskless $all_drop replicas drop during rdb pipe" {
+ set replicas {}
+ set replicas_alive {}
+ # start one replica that will read the rdb fast, and one that will be slow
+ start_server {} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+ start_server {} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+
+ # start replication
+ # it's enough for just one replica to be slow, and have it's write handler enabled
+ # so that the whole rdb generation process is bound to that
+ [lindex $replicas 0] config set repl-diskless-load swapdb
+ [lindex $replicas 0] config set key-load-delay 100
+ [lindex $replicas 0] replicaof $master_host $master_port
+ [lindex $replicas 1] replicaof $master_host $master_port
+
+ # wait for the replicas to start reading the rdb
+ # using the log file since the replica only responds to INFO once in 2mb
+ wait_for_log_message -1 "*Loading DB in memory*" 8 800 10
+
+ if {$measure_time} {
+ set master_statfile "/proc/$master_pid/stat"
+ set master_start_metrics [get_cpu_metrics $master_statfile]
+ set start_time [clock seconds]
+ }
+
+ # wait a while so that the pipe socket writer will be
+ # blocked on write (since replica 0 is slow to read from the socket)
+ after 500
+
+ # add some command to be present in the command stream after the rdb.
+ $master incr $all_drop
+
+ # disconnect replicas depending on the current test
+ if {$all_drop == "all" || $all_drop == "fast"} {
+ exec kill [srv 0 pid]
+ set replicas_alive [lreplace $replicas_alive 1 1]
+ }
+ if {$all_drop == "all" || $all_drop == "slow"} {
+ exec kill [srv -1 pid]
+ set replicas_alive [lreplace $replicas_alive 0 0]
+ }
+
+ # wait for rdb child to exit
+ wait_for_condition 500 100 {
+ [s -2 rdb_bgsave_in_progress] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+
+ # make sure we got what we were aiming for, by looking for the message in the log file
+ if {$all_drop == "all"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" 12 1 1
+ }
+ if {$all_drop == "no"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" 12 1 1
+ }
+ if {$all_drop == "slow" || $all_drop == "fast"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" 12 1 1
+ }
+
+ # make sure we don't have a busy loop going thought epoll_wait
+ if {$measure_time} {
+ set master_end_metrics [get_cpu_metrics $master_statfile]
+ set time_elapsed [expr {[clock seconds]-$start_time}]
+ set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
+ set master_utime [lindex $master_cpu 0]
+ set master_stime [lindex $master_cpu 1]
+ if {$::verbose} {
+ puts "elapsed: $time_elapsed"
+ puts "master utime: $master_utime"
+ puts "master stime: $master_stime"
+ }
+ if {$all_drop == "all" || $all_drop == "slow"} {
+ assert {$master_utime < 70}
+ assert {$master_stime < 70}
+ }
+ if {$all_drop == "none" || $all_drop == "fast"} {
+ assert {$master_utime < 15}
+ assert {$master_stime < 15}
+ }
+ }
+
+ # verify the data integrity
+ foreach replica $replicas_alive {
+ # Wait that replicas acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 50 100 {
+ [lindex [$replica role] 3] eq {connected}
+ } else {
+ fail "replicas still not connected after some time"
+ }
+
+ # Make sure that replicas and master have same
+ # number of keys
+ wait_for_condition 50 100 {
+ [$master dbsize] == [$replica dbsize]
+ } else {
+ fail "Different number of keys between master and replicas after too long time."
+ }
+
+ # Check digests
+ set digest [$master debug digest]
+ set digest0 [$replica debug digest]
+ assert {$digest ne 0000000000000000000000000000000000000000}
+ assert {$digest eq $digest0}
+ }
+ }
+ }
+ }
+ }
+}