Re-design cluster link send buffer to improve memory management (#11343)

Re-design cluster link send queue to improve memory management
author: Brennan <31714723+BCathcart@users.noreply.github.com> 2022-11-01 22:26:44 -0400
committer: GitHub <noreply@github.com> 2022-11-01 19:26:44 -0700
commit: 47c493e070c8ac59ccc34d694700bca8ec517fbc (patch)
tree: 3badf9ecdb5b62ceece2e317a90cf2d472559aa5 /tests/cluster
parent: 4a8a62505134094f8de0ca293da932c3d009df15 (diff)
download: redis-47c493e070c8ac59ccc34d694700bca8ec517fbc.tar.gz
2 files changed, 0 insertions, 183 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index 9c669e128..056bfc311 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -217,72 +217,3 @@ proc are_hostnames_propagated {match_string} {
     }
     return 1
 }
-
-# Returns a parsed CLUSTER LINKS output of the instance identified
-# by the given `id` as a list of dictionaries, with each dictionary
-# corresponds to a link.
-proc get_cluster_links id {
-    set lines [R $id cluster links]
-    set links {}
-    foreach l $lines {
-        if {$l eq {}} continue
-        assert_equal [llength $l] 12
-        assert_equal [lindex $l 0] "direction"
-        set dir [lindex $l 1]
-        assert_equal [lindex $l 2] "node"
-        set node [lindex $l 3]
-        assert_equal [lindex $l 4] "create-time"
-        set create_time [lindex $l 5]
-        assert_equal [lindex $l 6] "events"
-        set events [lindex $l 7]
-        assert_equal [lindex $l 8] "send-buffer-allocated"
-        set send_buffer_allocated [lindex $l 9]
-        assert_equal [lindex $l 10] "send-buffer-used"
-        set send_buffer_used [lindex $l 11]
-        set link [dict create \
-            dir $dir \
-            node $node \
-            create_time $create_time \
-            events $events \
-            send_buffer_allocated $send_buffer_allocated \
-            send_buffer_used $send_buffer_used \
-        ]
-        lappend links $link
-    }
-    return $links
-}
-
-proc get_links_with_peer {this_instance_id peer_nodename} {
-    set links [get_cluster_links $this_instance_id]
-    set links_with_peer {}
-    foreach l $links {
-        if {[dict get $l node] eq $peer_nodename} {
-            lappend links_with_peer $l
-        }
-    }
-    return $links_with_peer
-}
-
-# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that
-# corresponds to the link established toward a peer identified by `peer_nodename`
-proc get_link_to_peer {this_instance_id peer_nodename} {
-    set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename]
-    foreach l $links_with_peer {
-        if {[dict get $l dir] eq "to"} {
-            return $l
-        }
-    }
-    return {}
-}
-
-# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that
-# corresponds to the link accepted from a peer identified by `peer_nodename`
-proc get_link_from_peer {this_instance_id peer_nodename} {
-    set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename]
-    foreach l $links_with_peer {
-        if {[dict get $l dir] eq "from"} {
-            return $l
-        }
-    }
-    return {}
-}
-\ No newline at end of file
diff --git a/tests/cluster/tests/24-links.tcl b/tests/cluster/tests/24-links.tcl
deleted file mode 100644
index d0ddea200..000000000
--- a/tests/cluster/tests/24-links.tcl
+++ /dev/null
@@ -1,114 +0,0 @@
-source "../tests/includes/init-tests.tcl"
-
-test "Create a cluster with two single-node shards" {
-    create_cluster 2 0
-}
-
-test "Cluster should start ok" {
-    assert_cluster_state ok
-}
-
-proc number_of_peers {id} {
-    expr [llength [get_cluster_nodes $id]] - 1
-}
-
-proc number_of_links {id} {
-    llength [get_cluster_links $id]
-}
-
-test "Each node has two links with each peer" {
-    foreach_redis_id id {
-        # Assert that from point of view of each node, there are two links for
-        # each peer. It might take a while for cluster to stabilize so wait up
-        # to 5 seconds.
-        wait_for_condition 50 100 {
-            [number_of_peers $id]*2 == [number_of_links $id]
-        } else {
-            assert_equal [expr [number_of_peers $id]*2] [number_of_links $id]
-        }
-
-        set nodes [get_cluster_nodes $id]
-        set links [get_cluster_links $id]
-
-        # For each peer there should be exactly one
-        # link "to" it and one link "from" it.
-        foreach n $nodes {
-            if {[has_flag $n myself]} continue
-            set peer [dict get $n id]
-            set to 0
-            set from 0
-            foreach l $links {
-                if {[dict get $l node] eq $peer} {
-                    if {[dict get $l dir] eq "to"} {
-                        incr to
-                    } elseif {[dict get $l dir] eq "from"} {
-                        incr from
-                    }
-                }
-            }
-            assert {$to eq 1}
-            assert {$from eq 1}
-        }
-    }
-}
-
-set primary1_id 0
-set primary2_id 1
-
-set primary1 [Rn $primary1_id]
-set primary2 [Rn $primary2_id]
-
-test "Disconnect link when send buffer limit reached" {
-    # On primary1, set timeout to 1 hour so links won't get disconnected due to timeouts
-    set oldtimeout [lindex [$primary1 CONFIG get cluster-node-timeout] 1]
-    $primary1 CONFIG set cluster-node-timeout [expr 60*60*1000]
-
-    # Get primary1's links with primary2
-    set primary2_name [dict get [get_myself $primary2_id] id]
-    set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
-    set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
-
-    # On primary1, set cluster link send buffer limit to 256KB, which is large enough to not be
-    # overflowed by regular gossip messages but also small enough that it doesn't take too much
-    # memory to overflow it. If it is set too high, Redis may get OOM killed by kernel before this
-    # limit is overflowed in some RAM-limited test environments.
-    set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1]
-    $primary1 CONFIG set cluster-link-sendbuf-limit [expr 256*1024]
-    assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 0}
-
-    # To manufacture an ever-growing send buffer from primary1 to primary2,
-    # make primary2 unresponsive.
-    set primary2_pid [get_instance_attrib redis $primary2_id pid]
-    exec kill -SIGSTOP $primary2_pid
-
-    # On primary1, send 128KB Pubsub messages in a loop until the send buffer of the link from
-    # primary1 to primary2 exceeds buffer limit therefore be dropped.
-    # For the send buffer to grow, we need to first exhaust TCP send buffer of primary1 and TCP
-    # receive buffer of primary2 first. The sizes of these two buffers vary by OS, but 100 128KB
-    # messages should be sufficient.
-    set i 0
-    wait_for_condition 100 0 {
-        [catch {incr i} e] == 0 &&
-        [catch {$primary1 publish channel [prepare_value [expr 128*1024]]} e] == 0 &&
-        [catch {after 500} e] == 0 &&
-        [get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] >= 1
-    } else {
-        fail "Cluster link not freed as expected"
-    }
-    puts -nonewline "$i 128KB messages needed to overflow 256KB buffer limit. "
-
-    # A new link to primary2 should have been recreated
-    set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
-    assert {[dict get $new_link_p1_to_p2 create_time] > [dict get $orig_link_p1_to_p2 create_time]}
-
-    # Link from primary2 should not be affected
-    set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
-    assert {[dict get $same_link_p1_from_p2 create_time] eq [dict get $orig_link_p1_from_p2 create_time]}
-
-    # Revive primary2
-    exec kill -SIGCONT $primary2_pid
-
-    # Reset configs on primary1 so config changes don't leak out to other tests
-    $primary1 CONFIG set cluster-node-timeout $oldtimeout
-    $primary1 CONFIG set cluster-link-sendbuf-limit $oldlimit
-}
author	Brennan <31714723+BCathcart@users.noreply.github.com>	2022-11-01 22:26:44 -0400
committer	GitHub <noreply@github.com>	2022-11-01 19:26:44 -0700
commit	47c493e070c8ac59ccc34d694700bca8ec517fbc (patch)
tree	3badf9ecdb5b62ceece2e317a90cf2d472559aa5 /tests/cluster
parent	4a8a62505134094f8de0ca293da932c3d009df15 (diff)
download	redis-47c493e070c8ac59ccc34d694700bca8ec517fbc.tar.gz