summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorxbasel <103044017+xbasel@users.noreply.github.com>2023-03-12 19:47:06 +0200
committerGitHub <noreply@github.com>2023-03-12 19:47:06 +0200
commit7be7834e6573134b1e5a0b8ea90acf856b0c7774 (patch)
tree79b2b2b961c32f1ebc3b0e63a11d0a5daf0f6ff4 /tests
parent08cd3bf2924da099facdc1fa30706c93ad3b4871 (diff)
downloadredis-7be7834e6573134b1e5a0b8ea90acf856b0c7774.tar.gz
Large blocks of replica client output buffer could lead to psync loops and unnecessary memory usage (#11666)
This can happen when a key almost equal or larger than the client output buffer limit of the replica is written. Example: 1. DB is empty 2. Backlog size is 1 MB 3. Client out put buffer limit is 2 MB 4. Client writes a 3 MB key 5. The shared replication buffer will have a single node which contains the key written above, and it exceeds the backlog size. At this point the client output buffer usage calculation will report the replica buffer to be 3 MB (or more) even after sending all the data to the replica. The primary drops the replica connection for exceeding the limits, the replica reconnects and successfully executes partial sync but the primary will drop the connection again because the buffer usage is still 3 MB. This happens over and over. To mitigate the problem, this fix limits the maximum size of a single backlog node to be (repl_backlog_size/16). This way a single node can't exceed the limits of the COB (the COB has to be larger than the backlog). It also means that if the backlog has some excessive data it can't trim, it would be at most about 6% overuse. other notes: 1. a loop was added in feedReplicationBuffer which caused a massive LOC change due to indentation, the actual changes are just the `min(max` and the loop. 3. an unrelated change in an existing test to speed up a server termination which took 10 seconds. Co-authored-by: Oran Agra <oran@redislabs.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/integration/replication-buffer.tcl57
1 files changed, 57 insertions, 0 deletions
diff --git a/tests/integration/replication-buffer.tcl b/tests/integration/replication-buffer.tcl
index 07e2cbc1b..fe85632ae 100644
--- a/tests/integration/replication-buffer.tcl
+++ b/tests/integration/replication-buffer.tcl
@@ -180,6 +180,8 @@ start_server {} {
}
exec kill -SIGCONT $replica2_pid
}
+ # speed up termination
+ $master config set shutdown-timeout 0
}
}
}
@@ -227,3 +229,58 @@ test {Partial resynchronization is successful even client-output-buffer-limit is
}
}
}
+
+# This test was added to make sure big keys added to the backlog do not trigger psync loop.
+test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} {
+ proc client_field {r type f} {
+ set client [$r client list type $type]
+ if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
+ error "field $f not found for in $client"
+ }
+ return $res
+ }
+
+ start_server {tags {"repl external:skip"}} {
+ start_server {} {
+ set replica [srv -1 client]
+ set replica_host [srv -1 host]
+ set replica_port [srv -1 port]
+ set master [srv 0 client]
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+
+ $master config set repl-backlog-size 16384
+ $master config set client-output-buffer-limit "replica 32768 32768 60"
+ # Key has has to be larger than replica client-output-buffer limit.
+ set keysize [expr 256*1024]
+
+ $replica replicaof $master_host $master_port
+ wait_for_condition 50 100 {
+ [lindex [$replica role] 0] eq {slave} &&
+ [string match {*master_link_status:up*} [$replica info replication]]
+ } else {
+ fail "Can't turn the instance into a replica"
+ }
+
+ set _v [prepare_value $keysize]
+ $master set key $_v
+ wait_for_ofs_sync $master $replica
+
+ # Write another key to force the test to wait for another event loop iteration
+ # to give the serverCron a chance to disconnect replicas with COB size exeeeding the limits
+ $master set key1 "1"
+ wait_for_ofs_sync $master $replica
+
+ assert {[status $master connected_slaves] == 1}
+
+ wait_for_condition 50 100 {
+ [client_field $master replica tot-mem] < $keysize
+ } else {
+ fail "replica client-output-buffer usage is higher than expected."
+ }
+
+ assert {[status $master sync_partial_ok] == 0}
+ }
+ }
+}
+