summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorantirez <antirez@gmail.com>2013-04-30 12:24:46 +0200
committerantirez <antirez@gmail.com>2013-04-30 15:08:29 +0200
commite5ef85c4441b8cdfdbd39dc2626376311f36e4ec (patch)
tree474a1d0a96ce673dc67ab17102c33192bc18be0f
parentef05a78e7ee8dd179022157d09072d92ae966c55 (diff)
downloadredis-e5ef85c4441b8cdfdbd39dc2626376311f36e4ec.tar.gz
Sentinel: changes to tilt mode.
Tilt mode was too aggressive (not processing INFO output), this resulted in a few problems: 1) Redirections were not followed when in tilt mode. This opened a window to misinform clients about the current master when a Sentinel was in tilt mode and a fail over happened during the time it was not able to update the state. 2) It was possible for a Sentinel exiting tilt mode to detect a false fail over start, if a slave rebooted with a wrong configuration about at the same time. This used to happen since in tilt mode we lose the information that the runid changed (reboot). Now instead the Sentinel in tilt mode will still remove the instance from the list of slaves if it changes state AND runid at the same time. Both are edge conditions but the changes should overall improve the reliability of Sentinel.
-rw-r--r--src/sentinel.c41
1 files changed, 26 insertions, 15 deletions
diff --git a/src/sentinel.c b/src/sentinel.c
index 2711ca304..a4db9408e 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -1461,10 +1461,13 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->info_refresh = mstime();
sdsfreesplitres(lines,numlines);
- /* ---------------------------- Acting half ----------------------------- */
- if (sentinel.tilt) return;
+ /* ---------------------------- Acting half -----------------------------
+ * Some things will not happen if sentinel.tilt is true, but some will
+ * still be processed. */
- /* Act if a master turned into a slave. */
+ /* When what we believe is our master, turned into a slave, the wiser
+ * thing we can do is to follow the events and redirect to the new
+ * master, always. */
if ((ri->flags & SRI_MASTER) && role == SRI_SLAVE && ri->slave_master_host)
{
sentinelEvent(REDIS_WARNING,"+redirect-to-master",ri,
@@ -1473,12 +1476,12 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
ri->slave_master_host, ri->slave_master_port);
sentinelResetMasterAndChangeAddress(ri,ri->slave_master_host,
ri->slave_master_port);
- return;
+ return; /* Don't process anything after this event. */
}
- /* Act if a slave turned into a master. */
+ /* Handle slave -> master role switch. */
if ((ri->flags & SRI_SLAVE) && role == SRI_MASTER) {
- if (ri->flags & SRI_DEMOTE) {
+ if (!sentinel.tilt && ri->flags & SRI_DEMOTE) {
/* If this sentinel was partitioned from the slave's master,
* or tilted recently, wait some time before to act,
* so that DOWN and roles INFO will be refreshed. */
@@ -1513,22 +1516,25 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
sentinelEvent(REDIS_NOTICE,"-demote-flag-cleared",ri,"%@");
}
} else if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
- (runid_changed || first_runid))
+ (runid_changed || first_runid))
{
/* If a slave turned into master but:
*
* 1) Failover not in progress.
- * 2) RunID hs changed, or its the first time we see an INFO output.
+ * 2) RunID has changed or its the first time we see an INFO output.
*
* We assume this is a reboot with a wrong configuration.
- * Log the event and remove the slave. */
+ * Log the event and remove the slave. Note that this is processed
+ * in tilt mode as well, otherwise we lose the information that the
+ * runid changed (reboot?) and when the tilt mode ends a fake
+ * failover will be detected. */
int retval;
sentinelEvent(REDIS_WARNING,"-slave-restart-as-master",ri,"%@ #removing it from the attached slaves");
retval = dictDelete(ri->master->slaves,ri->name);
redisAssert(retval == REDIS_OK);
return;
- } else if (ri->flags & SRI_PROMOTED) {
+ } else if (!sentinel.tilt && ri->flags & SRI_PROMOTED) {
/* If this is a promoted slave we can change state to the
* failover state machine. */
if ((ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
@@ -1544,11 +1550,12 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
sentinelCallClientReconfScript(ri->master,SENTINEL_LEADER,
"start",ri->master->addr,ri->addr);
}
- } else if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) ||
- ((ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
- (ri->master->flags & SRI_I_AM_THE_LEADER) &&
- ri->master->failover_state ==
- SENTINEL_FAILOVER_STATE_WAIT_START))
+ } else if (!sentinel.tilt && (
+ !(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) ||
+ ((ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
+ (ri->master->flags & SRI_I_AM_THE_LEADER) &&
+ ri->master->failover_state ==
+ SENTINEL_FAILOVER_STATE_WAIT_START)))
{
/* No failover in progress? Then it is the start of a failover
* and we are an observer.
@@ -1580,6 +1587,10 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
}
}
+ /* None of the following conditions are processed when in tilt mode, so
+ * return asap. */
+ if (sentinel.tilt) return;
+
/* Detect if the slave that is in the process of being reconfigured
* changed state. */
if ((ri->flags & SRI_SLAVE) && role == SRI_SLAVE &&