summaryrefslogtreecommitdiff
path: root/src/cluster.c
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2014-05-14 10:46:37 +0200
committerantirez <antirez@gmail.com>2014-05-14 10:46:37 +0200
commit6baac558d87093bcc64fe053540f059664a0a429 (patch)
tree2b8399eb224bd3d5fd08df190621e6825d69e7bf /src/cluster.c
parent27ca133d35323346714c0af5a650d3a8c0238099 (diff)
downloadredis-6baac558d87093bcc64fe053540f059664a0a429.tar.gz
Cluster: better handling of stolen slots.
The previous code handling a lost slot (by another master with an higher configuration for the slot) was defensive, considering it an error and putting the cluster in an odd state requiring redis-cli fix. This was changed, because actually this only happens either in a legitimate way, with failovers, or when the admin messed with the config in order to reconfigure the cluster. So the new code instead will try to make sure that the keys stored match the new slots map, by removing all the keys in the slots we lost ownership from. The function that deletes the keys from the lost slots is called only if the node does not lose all its slots (resulting in a reconfiguration as a slave of the node that got ownership). This is an optimization since the replication code will anyway flush all the instance data in a faster way.
Diffstat (limited to 'src/cluster.c')
-rw-r--r--src/cluster.c38
1 files changed, 23 insertions, 15 deletions
diff --git a/src/cluster.c b/src/cluster.c
index 81eff9442..046a36fc9 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1212,6 +1212,15 @@ void clusterSetNodeAsMaster(clusterNode *n) {
void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
int j;
clusterNode *curmaster, *newmaster = NULL;
+ /* The dirty slots list is a list of slots for which we lose the ownership
+ * while having still keys inside. This usually happens after a failover
+ * or after a manual cluster reconfiguration operated by the admin.
+ *
+ * If the update message is not able to demote a master to slave (in this
+ * case we'll resync with the master updating the whole key space), we
+ * need to delete all the keys in the slots we lost ownership. */
+ uint16_t dirty_slots[REDIS_CLUSTER_SLOTS];
+ int dirty_slots_count = 0;
/* Here we set curmaster to this node or the node this node
* replicates to if it's a slave. In the for loop we are
@@ -1241,25 +1250,14 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
if (server.cluster->slots[j] == NULL ||
server.cluster->slots[j]->configEpoch < senderConfigEpoch)
{
- /* Was this slot mine, and still contains keys? Something
- * odd happened, put the slot in importing state so that
- * redis-trib fix can detect the condition (and no further
- * updates will be processed before the slot gets fixed). */
+ /* Was this slot mine, and still contains keys? Mark it as
+ * a dirty slot. */
if (server.cluster->slots[j] == myself &&
countKeysInSlot(j) &&
sender != myself)
{
- redisLog(REDIS_WARNING,
- "I received an update for slot %d. "
- "%.40s claims it with config %llu, "
- "I've it assigned to myself with config %llu. "
- "I've still keys about this slot! "
- "Putting the slot in IMPORTING state. "
- "Please run the 'redis-trib fix' command.",
- j, sender->name,
- (unsigned long long) senderConfigEpoch,
- (unsigned long long) myself->configEpoch);
- server.cluster->importing_slots_from[j] = sender;
+ dirty_slots[dirty_slots_count] = j;
+ dirty_slots_count++;
}
if (server.cluster->slots[j] == curmaster)
@@ -1288,6 +1286,16 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE|
CLUSTER_TODO_FSYNC_CONFIG);
+ } else if (dirty_slots_count) {
+ /* If we are here, we received an update message which removed
+ * ownership for certain slots we still have keys about, but still
+ * we are serving some slots, so this master node was not demoted to
+ * a slave.
+ *
+ * In order to maintain a consistent state between keys and slots
+ * we need to remove all the keys from the slots we lost. */
+ for (j = 0; j < dirty_slots_count; j++)
+ delKeysInSlot(dirty_slots[j]);
}
}