Introduce Shard IDs to logically group nodes in cluster mode (#10536)

Introduce Shard IDs to logically group nodes in cluster mode. 1. Added a new "shard_id" field to "cluster nodes" output and nodes.conf after "hostname" 2. Added a new PING extension to propagate "shard_id" 3. Handled upgrade from pre-7.2 releases automatically 4. Refactored PING extension assembling/parsing logic Behavior of Shard IDs: Replicas will always follow the shards of their reported primaries. If a primary updates its shard ID, the replica will follow. (This need not follow for cluster v2) This is not an expected use case.
author: Ping Xie <pingxie@google.com> 2022-11-16 19:24:18 -0800
committer: GitHub <noreply@github.com> 2022-11-16 19:24:18 -0800
commit: 203b12e41ff7981f0fae5b23819f072d61594813 (patch)
tree: ff5f2f829bbfcc6928190a01d401ee4b1ebbaf9e /tests/cluster
parent: 2168ccc661791ced6271c5e4ab0f5eb60b1559e2 (diff)
download: redis-203b12e41ff7981f0fae5b23819f072d61594813.tar.gz
2 files changed, 105 insertions, 3 deletions
diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl
index 056bfc311..e86d70324 100644
--- a/tests/cluster/cluster.tcl
+++ b/tests/cluster/cluster.tcl
@@ -18,7 +18,8 @@ proc get_cluster_nodes id {
         set args [split $l]
         set node [dict create \
             id [lindex $args 0] \
-            addr [lindex $args 1] \
+            addr [lindex [split [lindex $args 1] ,] 0] \
+            shard-id [lindex [split [lindex [split [lindex $args 1] ,] 2] = ] 1]\
             flags [split [lindex $args 2] ,] \
             slaveof [lindex $args 3] \
             ping_sent [lindex $args 4] \
diff --git a/tests/cluster/tests/28-cluster-shards.tcl b/tests/cluster/tests/28-cluster-shards.tcl
index 8d218cb6f..726c460c3 100644
--- a/tests/cluster/tests/28-cluster-shards.tcl
+++ b/tests/cluster/tests/28-cluster-shards.tcl
@@ -39,6 +39,17 @@ proc get_node_info_from_shard {id reference {type node}} {
     return {}
 }
 
+proc cluster_ensure_master {id} {
+    if { [regexp "master" [R $id role]] == 0 } {
+        assert_equal {OK} [R $id CLUSTER FAILOVER]
+        wait_for_condition 50 100 {
+            [regexp "master" [R $id role]] == 1
+        } else {
+            fail "instance $id is not master"
+        }
+    }
+}
+
 test "Create a 8 nodes cluster with 4 shards" {
     cluster_create_with_split_slots 4 4
 }
@@ -87,7 +98,7 @@ test "Verify information about the shards" {
                 # Replica could be in online or loading
             }
         }
-    }    
+    }
 }
 
 test "Verify no slot shard" {
@@ -180,7 +191,7 @@ test "Test the replica reports a loading state while it's loading" {
         fail "Replica never transitioned to online"
     }
 
-    # Final sanity, the replica agrees it is online. 
+    # Final sanity, the replica agrees it is online.
     assert_equal "online" [dict get [get_node_info_from_shard $replica_cluster_id $replica_id "node"] health]
 }
 
@@ -200,3 +211,93 @@ test "Regression test for a crash when calling SHARDS during handshake" {
 test "Cluster is up" {
     assert_cluster_state ok
 }
+test "Shard ids are unique" {
+    set shard_ids {}
+    for {set i 0} {$i < 4} {incr i} {
+        set shard_id [R $i cluster myshardid]
+        assert_equal [dict exists $shard_ids $shard_id] 0
+        dict set shard_ids $shard_id 1
+    }
+}
+
+test "CLUSTER MYSHARDID reports same id for both primary and replica" {
+    for {set i 0} {$i < 4} {incr i} {
+        assert_equal [R $i cluster myshardid] [R [expr $i+4] cluster myshardid]
+        assert_equal [string length [R $i cluster myshardid]] 40
+    }
+}
+
+test "CLUSTER NODES reports correct shard id" {
+    for {set i 0} {$i < 8} {incr i} {
+        set nodes [get_cluster_nodes $i]
+        set node_id_to_shardid_mapping []
+        foreach n $nodes {
+            set node_shard_id [dict get $n shard-id]
+            set node_id [dict get $n id]
+            assert_equal [string length $node_shard_id] 40
+            if {[dict exists $node_id_to_shardid_mapping $node_id]} {
+                assert_equal [dict get $node_id_to_shardid_mapping $node_id] $node_shard_id
+            } else {
+                dict set node_id_to_shardid_mapping $node_id $node_shard_id
+            }
+            if {[lindex [dict get $n flags] 0] eq "myself"} {
+                assert_equal [R $i cluster myshardid] [dict get $n shard-id]
+            }
+        }
+    }
+}
+
+test "New replica receives primary's shard id" {
+    #find a primary
+    set id 0
+    for {} {$id < 8} {incr id} {
+        if {[regexp "master" [R $id role]]} {
+            break
+        }
+    }
+    assert_not_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+    assert_equal {OK} [R 8 cluster replicate [R $id cluster myid]]
+    assert_equal [R 8 cluster myshardid] [R $id cluster myshardid]
+}
+
+test "CLUSTER MYSHARDID reports same shard id after shard restart" {
+    set node_ids {}
+    for {set i 0} {$i < 8} {incr i 4} {
+        dict set node_ids $i [R $i cluster myshardid]
+        kill_instance redis $i
+        wait_for_condition 50 100 {
+            [instance_is_killed redis $i]
+        } else {
+            fail "instance $i is not killed"
+        }
+    }
+    for {set i 0} {$i < 8} {incr i 4} {
+        restart_instance redis $i
+    }
+    assert_cluster_state ok
+    for {set i 0} {$i < 8} {incr i 4} {
+        assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+    }
+}
+
+test "CLUSTER MYSHARDID reports same shard id after cluster restart" {
+    set node_ids {}
+    for {set i 0} {$i < 8} {incr i} {
+        dict set node_ids $i [R $i cluster myshardid]
+    }
+    for {set i 0} {$i < 8} {incr i} {
+        kill_instance redis $i
+        wait_for_condition 50 100 {
+            [instance_is_killed redis $i]
+        } else {
+            fail "instance $i is not killed"
+        }
+    }
+    for {set i 0} {$i < 8} {incr i} {
+        restart_instance redis $i
+    }
+    assert_cluster_state ok
+    for {set i 0} {$i < 8} {incr i} {
+        assert_equal [dict get $node_ids $i] [R $i cluster myshardid]
+    }
+}
author	Ping Xie <pingxie@google.com>	2022-11-16 19:24:18 -0800
committer	GitHub <noreply@github.com>	2022-11-16 19:24:18 -0800
commit	203b12e41ff7981f0fae5b23819f072d61594813 (patch)
tree	ff5f2f829bbfcc6928190a01d401ee4b1ebbaf9e /tests/cluster
parent	2168ccc661791ced6271c5e4ab0f5eb60b1559e2 (diff)
download	redis-203b12e41ff7981f0fae5b23819f072d61594813.tar.gz