diff options
author | ny0312 <49037844+ny0312@users.noreply.github.com> | 2021-12-16 21:56:59 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-16 21:56:59 -0800 |
commit | 792afb443211f190b3f8bea15e945661453fbddf (patch) | |
tree | ae3a50c8adc9ed0a3adb3d1093e7f0129b469a2b /tests | |
parent | 687210f1550cf9048bed5f5539c9411fb22cd3b0 (diff) | |
download | redis-792afb443211f190b3f8bea15e945661453fbddf.tar.gz |
Introduce memory management on cluster link buffers (#9774)
Introduce memory management on cluster link buffers:
* Introduce a new `cluster-link-sendbuf-limit` config that caps memory usage of cluster bus link send buffers.
* Introduce a new `CLUSTER LINKS` command that displays current TCP links to/from peers.
* Introduce a new `mem_cluster_links` field under `INFO` command output, which displays the overall memory usage by all current cluster links.
* Introduce a new `total_cluster_links_buffer_limit_exceeded` field under `CLUSTER INFO` command output, which displays the accumulated count of cluster links freed due to `cluster-link-sendbuf-limit`.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/cluster/cluster.tcl | 69 | ||||
-rw-r--r-- | tests/cluster/tests/24-links.tcl | 99 | ||||
-rw-r--r-- | tests/support/util.tcl | 8 | ||||
-rw-r--r-- | tests/unit/pendingquerybuf.tcl | 8 |
4 files changed, 176 insertions, 8 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl index e95789282..7b7ce5343 100644 --- a/tests/cluster/cluster.tcl +++ b/tests/cluster/cluster.tcl @@ -175,3 +175,72 @@ proc wait_for_cluster_propagation {} { fail "cluster config did not reach a consistent state" } } + +# Returns a parsed CLUSTER LINKS output of the instance identified +# by the given `id` as a list of dictionaries, with each dictionary +# corresponds to a link. +proc get_cluster_links id { + set lines [R $id cluster links] + set links {} + foreach l $lines { + if {$l eq {}} continue + assert_equal [llength $l] 12 + assert_equal [lindex $l 0] "direction" + set dir [lindex $l 1] + assert_equal [lindex $l 2] "node" + set node [lindex $l 3] + assert_equal [lindex $l 4] "create-time" + set create_time [lindex $l 5] + assert_equal [lindex $l 6] "events" + set events [lindex $l 7] + assert_equal [lindex $l 8] "send-buffer-allocated" + set send_buffer_allocated [lindex $l 9] + assert_equal [lindex $l 10] "send-buffer-used" + set send_buffer_used [lindex $l 11] + set link [dict create \ + dir $dir \ + node $node \ + create_time $create_time \ + events $events \ + send_buffer_allocated $send_buffer_allocated \ + send_buffer_used $send_buffer_used \ + ] + lappend links $link + } + return $links +} + +proc get_links_with_peer {this_instance_id peer_nodename} { + set links [get_cluster_links $this_instance_id] + set links_with_peer {} + foreach l $links { + if {[dict get $l node] eq $peer_nodename} { + lappend links_with_peer $l + } + } + return $links_with_peer +} + +# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that +# corresponds to the link established toward a peer identified by `peer_nodename` +proc get_link_to_peer {this_instance_id peer_nodename} { + set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename] + foreach l $links_with_peer { + if {[dict get $l dir] eq "to"} { + return $l + } + } + return {} +} + +# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that +# corresponds to the link accepted from a peer identified by `peer_nodename` +proc get_link_from_peer {this_instance_id peer_nodename} { + set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename] + foreach l $links_with_peer { + if {[dict get $l dir] eq "from"} { + return $l + } + } + return {} +} diff --git a/tests/cluster/tests/24-links.tcl b/tests/cluster/tests/24-links.tcl new file mode 100644 index 000000000..6657a8ce4 --- /dev/null +++ b/tests/cluster/tests/24-links.tcl @@ -0,0 +1,99 @@ +source "../tests/includes/init-tests.tcl" + +test "Create a cluster with two single-node shards" { + create_cluster 2 0 +} + +test "Cluster should start ok" { + assert_cluster_state ok +} + +test "Each node has two links with each peer" { + foreach_redis_id id { + # Get number of peers, excluding myself + set nodes [get_cluster_nodes $id] + set num_peers [expr [llength $nodes] - 1] + + # Get number of links to peers + set links [get_cluster_links $id] + set num_links [llength $links] + + # Two links per peer + assert {$num_peers*2 eq $num_links} + + # For each peer there should be exactly one + # link "to" it and one link "from" it. + foreach n $nodes { + if {[has_flag $n myself]} continue + set peer [dict get $n id] + set to 0 + set from 0 + foreach l $links { + if {[dict get $l node] eq $peer} { + if {[dict get $l dir] eq "to"} { + incr to + } elseif {[dict get $l dir] eq "from"} { + incr from + } + } + } + assert {$to eq 1} + assert {$from eq 1} + } + } +} + +set primary1_id 0 +set primary2_id 1 + +set primary1 [Rn $primary1_id] +set primary2 [Rn $primary2_id] + +test "Disconnect link when send buffer limit reached" { + # On primary1, set timeout to 1 hour so links won't get disconnected due to timeouts + set oldtimeout [lindex [$primary1 CONFIG get cluster-node-timeout] 1] + $primary1 CONFIG set cluster-node-timeout [expr 60*60*1000] + + # Get primary1's links with primary2 + set primary2_name [dict get [get_myself $primary2_id] id] + set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name] + set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name] + + # On primary1, set cluster link send buffer limit to 32MB + set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1] + $primary1 CONFIG set cluster-link-sendbuf-limit [expr 32*1024*1024] + assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 0} + + # To manufacture an ever-growing send buffer from primary1 to primary2, + # make primary2 unresponsive. + set primary2_pid [get_instance_attrib redis $primary2_id pid] + exec kill -SIGSTOP $primary2_pid + + # On primary1, send a 10MB Pubsub message. It will stay in send buffer of + # the link from primary1 to primary2 + $primary1 publish channel [prepare_value [expr 10*1024*1024]] + + # Check the same link has not been disconnected, but its send buffer has grown + set same_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name] + assert {[dict get $same_link_p1_to_p2 create_time] eq [dict get $orig_link_p1_to_p2 create_time]} + assert {[dict get $same_link_p1_to_p2 send_buffer_allocated] > [dict get $orig_link_p1_to_p2 send_buffer_allocated]} + + # On primary1, send another 30MB Pubsub message. + $primary1 publish channel [prepare_value [expr 30*1024*1024]] + + # Link has exceeded buffer limit and been dropped and recreated + set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name] + assert {[dict get $new_link_p1_to_p2 create_time] > [dict get $orig_link_p1_to_p2 create_time]} + assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 1} + + # Link from primary2 should not be affected + set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name] + assert {[dict get $same_link_p1_from_p2 create_time] eq [dict get $orig_link_p1_from_p2 create_time]} + + # Revive primary2 + exec kill -SIGCONT $primary2_pid + + # Reset configs on primary1 so config changes don't leak out to other tests + $primary1 CONFIG set cluster-node-timeout $oldtimeout + $primary1 CONFIG set cluster-link-sendbuf-limit $oldlimit +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index d97743665..08fea1faa 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -978,3 +978,11 @@ proc read_big_bulk {code {compare no} {prefix ""}} { r readraw 0 return $resp_len } + +proc prepare_value {size} { + set _v "c" + for {set i 1} {$i < $size} {incr i} { + append _v 0 + } + return $_v +} diff --git a/tests/unit/pendingquerybuf.tcl b/tests/unit/pendingquerybuf.tcl index b1c2ee0d5..c1278c8fd 100644 --- a/tests/unit/pendingquerybuf.tcl +++ b/tests/unit/pendingquerybuf.tcl @@ -4,14 +4,6 @@ proc info_memory {r property} { } } -proc prepare_value {size} { - set _v "c" - for {set i 1} {$i < $size} {incr i} { - append _v 0 - } - return $_v -} - start_server {tags {"wait external:skip"}} { start_server {} { set slave [srv 0 client] |