diff options
Diffstat (limited to 'tests/cluster')
-rw-r--r-- | tests/cluster/cluster.tcl | 15 | ||||
-rw-r--r-- | tests/cluster/tests/00-base.tcl | 2 | ||||
-rw-r--r-- | tests/cluster/tests/11-manual-takeover.tcl | 24 | ||||
-rw-r--r-- | tests/cluster/tests/12-replica-migration-2.tcl | 7 | ||||
-rw-r--r-- | tests/cluster/tests/27-endpoints.tcl | 18 | ||||
-rw-r--r-- | tests/cluster/tests/28-cluster-shards.tcl | 185 |
6 files changed, 226 insertions, 25 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl index 531e90d6c..9c669e128 100644 --- a/tests/cluster/cluster.tcl +++ b/tests/cluster/cluster.tcl @@ -203,6 +203,21 @@ proc wait_for_cluster_propagation {} { } } +# Check if cluster's view of hostnames is consistent +proc are_hostnames_propagated {match_string} { + for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} { + set cfg [R $j cluster slots] + foreach node $cfg { + for {set i 2} {$i < [llength $node]} {incr i} { + if {! [string match $match_string [lindex [lindex [lindex $node $i] 3] 1]] } { + return 0 + } + } + } + } + return 1 +} + # Returns a parsed CLUSTER LINKS output of the instance identified # by the given `id` as a list of dictionaries, with each dictionary # corresponds to a link. diff --git a/tests/cluster/tests/00-base.tcl b/tests/cluster/tests/00-base.tcl index 656128e53..12d8244a8 100644 --- a/tests/cluster/tests/00-base.tcl +++ b/tests/cluster/tests/00-base.tcl @@ -64,7 +64,7 @@ test "It is possible to write and read from the cluster" { } test "Function no-cluster flag" { - R 1 function load lua test { + R 1 function load {#!lua name=test redis.register_function{function_name='f1', callback=function() return 'hello' end, flags={'no-cluster'}} } catch {R 1 fcall f1 0} e diff --git a/tests/cluster/tests/11-manual-takeover.tcl b/tests/cluster/tests/11-manual-takeover.tcl index f567c6962..78a0f858b 100644 --- a/tests/cluster/tests/11-manual-takeover.tcl +++ b/tests/cluster/tests/11-manual-takeover.tcl @@ -14,20 +14,32 @@ test "Cluster is writable" { cluster_write_test 0 } +# For this test, disable replica failover until +# all of the primaries are confirmed killed. Otherwise +# there might be enough time to elect a replica. +set replica_ids { 5 6 7 } +foreach id $replica_ids { + R $id config set cluster-replica-no-failover yes +} + test "Killing majority of master nodes" { kill_instance redis 0 kill_instance redis 1 kill_instance redis 2 } +foreach id $replica_ids { + R $id config set cluster-replica-no-failover no +} + test "Cluster should eventually be down" { assert_cluster_state fail } test "Use takeover to bring slaves back" { - R 5 cluster failover takeover - R 6 cluster failover takeover - R 7 cluster failover takeover + foreach id $replica_ids { + R $id cluster failover takeover + } } test "Cluster should eventually be up again" { @@ -39,9 +51,9 @@ test "Cluster is writable" { } test "Instance #5, #6, #7 are now masters" { - assert {[RI 5 role] eq {master}} - assert {[RI 6 role] eq {master}} - assert {[RI 7 role] eq {master}} + foreach id $replica_ids { + assert {[RI $id role] eq {master}} + } } test "Restarting the previously killed master nodes" { diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index f0493e57e..ed680061c 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -45,11 +45,12 @@ test "Resharding all the master #0 slots away from it" { } -test "Master #0 should lose its replicas" { +test "Master #0 who lost all slots should turn into a replica without replicas" { wait_for_condition 1000 50 { - [llength [lindex [R 0 role] 2]] == 0 + [RI 0 role] == "slave" && [RI 0 connected_slaves] == 0 } else { - fail "Master #0 still has replicas" + puts [R 0 info replication] + fail "Master #0 didn't turn itself into a replica" } } diff --git a/tests/cluster/tests/27-endpoints.tcl b/tests/cluster/tests/27-endpoints.tcl index 4010b92ed..32e3e794d 100644 --- a/tests/cluster/tests/27-endpoints.tcl +++ b/tests/cluster/tests/27-endpoints.tcl @@ -1,20 +1,5 @@ source "../tests/includes/init-tests.tcl" -# Check if cluster's view of hostnames is consistent -proc are_hostnames_propagated {match_string} { - for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} { - set cfg [R $j cluster slots] - foreach node $cfg { - for {set i 2} {$i < [llength $node]} {incr i} { - if {! [string match $match_string [lindex [lindex [lindex $node $i] 3] 1]] } { - return 0 - } - } - } - } - return 1 -} - # Isolate a node from the cluster and give it a new nodeid proc isolate_node {id} { set node_id [R $id CLUSTER MYID] @@ -212,6 +197,9 @@ test "Verify the nodes configured with prefer hostname only show hostname for ne test "Test restart will keep hostname information" { # Set a new hostname, reboot and make sure it sticks R 0 config set cluster-announce-hostname "restart-1.com" + # Store the hostname in the config + R 0 config rewrite + kill_instance redis 0 restart_instance redis 0 set slot_result [R 0 CLUSTER SLOTS] assert_equal [lindex [get_slot_field $slot_result 0 2 3] 1] "restart-1.com" diff --git a/tests/cluster/tests/28-cluster-shards.tcl b/tests/cluster/tests/28-cluster-shards.tcl new file mode 100644 index 000000000..fe794f2b7 --- /dev/null +++ b/tests/cluster/tests/28-cluster-shards.tcl @@ -0,0 +1,185 @@ +source "../tests/includes/init-tests.tcl" + +# Initial slot distribution. +set ::slot0 [list 0 1000 1002 5459 5461 5461 10926 10926] +set ::slot1 [list 5460 5460 5462 10922 10925 10925] +set ::slot2 [list 10923 10924 10927 16383] +set ::slot3 [list 1001 1001] + +proc cluster_create_with_split_slots {masters replicas} { + for {set j 0} {$j < $masters} {incr j} { + R $j cluster ADDSLOTSRANGE {*}[set ::slot${j}] + } + if {$replicas} { + cluster_allocate_slaves $masters $replicas + } + set ::cluster_master_nodes $masters + set ::cluster_replica_nodes $replicas +} + +# Get the node info with the specific node_id from the +# given reference node. Valid type options are "node" and "shard" +proc get_node_info_from_shard {id reference {type node}} { + set shards_response [R $reference CLUSTER SHARDS] + foreach shard_response $shards_response { + set nodes [dict get $shard_response nodes] + foreach node $nodes { + if {[dict get $node id] eq $id} { + if {$type eq "node"} { + return $node + } elseif {$type eq "shard"} { + return $shard_response + } else { + return {} + } + } + } + } + # No shard found, return nothing + return {} +} + +test "Create a 8 nodes cluster with 4 shards" { + cluster_create_with_split_slots 4 4 +} + +test "Cluster should start ok" { + assert_cluster_state ok +} + +test "Set cluster hostnames and verify they are propagated" { + for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} { + R $j config set cluster-announce-hostname "host-$j.com" + } + + # Wait for everyone to agree about the state + wait_for_cluster_propagation +} + +test "Verify information about the shards" { + set ids {} + for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} { + lappend ids [R $j CLUSTER MYID] + } + set slots [list $::slot0 $::slot1 $::slot2 $::slot3 $::slot0 $::slot1 $::slot2 $::slot3] + + # Verify on each node (primary/replica), the response of the `CLUSTER SLOTS` command is consistent. + for {set ref 0} {$ref < $::cluster_master_nodes + $::cluster_replica_nodes} {incr ref} { + for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} { + assert_equal [lindex $slots $i] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "shard"] slots] + assert_equal "host-$i.com" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] hostname] + assert_equal "127.0.0.1" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] ip] + # Default value of 'cluster-preferred-endpoint-type' is ip. + assert_equal "127.0.0.1" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] endpoint] + + if {$::tls} { + assert_equal [get_instance_attrib redis $i plaintext-port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port] + assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] tls-port] + } else { + assert_equal [get_instance_attrib redis $i port] [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] port] + } + + if {$i < 4} { + assert_equal "master" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role] + assert_equal "online" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] health] + } else { + assert_equal "replica" [dict get [get_node_info_from_shard [lindex $ids $i] $ref "node"] role] + # Replica could be in online or loading + } + } + } +} + +test "Verify no slot shard" { + # Node 8 has no slots assigned + set node_8_id [R 8 CLUSTER MYID] + assert_equal {} [dict get [get_node_info_from_shard $node_8_id 8 "shard"] slots] + assert_equal {} [dict get [get_node_info_from_shard $node_8_id 0 "shard"] slots] +} + +set node_0_id [R 0 CLUSTER MYID] + +test "Kill a node and tell the replica to immediately takeover" { + kill_instance redis 0 + R 4 cluster failover force +} + +# Primary 0 node should report as fail, wait until the new primary acknowledges it. +test "Verify health as fail for killed node" { + wait_for_condition 50 100 { + "fail" eq [dict get [get_node_info_from_shard $node_0_id 4 "node"] "health"] + } else { + fail "New primary never detected the node failed" + } +} + +set primary_id 4 +set replica_id 0 + +test "Restarting primary node" { + restart_instance redis $replica_id +} + +test "Instance #0 gets converted into a replica" { + wait_for_condition 1000 50 { + [RI $replica_id role] eq {slave} + } else { + fail "Old primary was not converted into replica" + } +} + +test "Test the replica reports a loading state while it's loading" { + # Test the command is good for verifying everything moves to a happy state + set replica_cluster_id [R $replica_id CLUSTER MYID] + wait_for_condition 50 1000 { + [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health] eq "online" + } else { + fail "Replica never transitioned to online" + } + + # Set 1 MB of data, so there is something to load on full sync + R $primary_id debug populate 1000 key 1000 + + # Kill replica client for primary and load new data to the primary + R $primary_id config set repl-backlog-size 100 + + # Set the key load delay so that it will take at least + # 2 seconds to fully load the data. + R $replica_id config set key-load-delay 4000 + + # Trigger event loop processing every 1024 bytes, this trigger + # allows us to send and receive cluster messages, so we are setting + # it low so that the cluster messages are sent more frequently. + R $replica_id config set loading-process-events-interval-bytes 1024 + + R $primary_id multi + R $primary_id client kill type replica + # populate the correct data + set num 100 + set value [string repeat A 1024] + for {set j 0} {$j < $num} {incr j} { + # Use hashtag valid for shard #0 + set key "{ch3}$j" + R $primary_id set $key $value + } + R $primary_id exec + + # The replica should reconnect and start a full sync, it will gossip about it's health to the primary. + wait_for_condition 50 1000 { + "loading" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health] + } else { + fail "Replica never transitioned to loading" + } + + # Speed up the key loading and verify everything resumes + R $replica_id config set key-load-delay 0 + + wait_for_condition 50 1000 { + "online" eq [dict get [get_node_info_from_shard $replica_cluster_id $primary_id "node"] health] + } else { + fail "Replica never transitioned to online" + } + + # Final sanity, the replica agrees it is online. + assert_equal "online" [dict get [get_node_info_from_shard $replica_cluster_id $replica_id "node"] health] +}
\ No newline at end of file |