summaryrefslogtreecommitdiff
path: root/src/sentinel.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/sentinel.c')
-rw-r--r--src/sentinel.c58
1 files changed, 37 insertions, 21 deletions
diff --git a/src/sentinel.c b/src/sentinel.c
index 973f35556..bc5935b02 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -139,10 +139,15 @@ typedef struct instanceLink {
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
- mstime_t last_ping_time; /* Last time a pending ping was sent in the
- context of the current command connection
- with the instance. 0 if still not sent or
- if pong already received. */
+ mstime_t act_ping_time; /* Time at which the last pending ping (no pong
+ received after it) was sent. This field is
+ set to 0 when a pong is received, and set again
+ to the current time if the value is 0 and a new
+ ping is sent. */
+ mstime_t last_ping_time; /* Time at which we sent the last ping. This is
+ only used to avoid sending too many pings
+ during failure. Idle time is computed using
+ the act_ping_time field. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
@@ -925,11 +930,12 @@ instanceLink *createInstanceLink(void) {
link->pc_conn_time = 0;
link->last_reconn_time = 0;
link->pc_last_activity = 0;
- /* We set the last_ping_time to "now" even if we actually don't have yet
+ /* We set the act_ping_time to "now" even if we actually don't have yet
* a connection with the node, nor we sent a ping.
* This is useful to detect a timeout in case we'll not be able to connect
* with the node at all. */
- link->last_ping_time = mstime();
+ link->act_ping_time = mstime();
+ link->last_ping_time = 0;
link->last_avail_time = mstime();
link->last_pong_time = mstime();
return link;
@@ -1344,7 +1350,8 @@ void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
sdsfree(ri->slave_master_host);
ri->runid = NULL;
ri->slave_master_host = NULL;
- ri->link->last_ping_time = mstime();
+ ri->link->act_ping_time = mstime();
+ ri->link->last_ping_time = 0;
ri->link->last_avail_time = mstime();
ri->link->last_pong_time = mstime();
ri->role_reported_time = mstime();
@@ -2199,7 +2206,7 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
strncmp(r->str,"MASTERDOWN",10) == 0)
{
link->last_avail_time = mstime();
- link->last_ping_time = 0; /* Flag the pong as received. */
+ link->act_ping_time = 0; /* Flag the pong as received. */
} else {
/* Send a SCRIPT KILL command if the instance appears to be
* down because of a busy script. */
@@ -2440,20 +2447,31 @@ int sentinelForceHelloUpdateForMaster(sentinelRedisInstance *master) {
return REDIS_OK;
}
-/* Send a PING to the specified instance and refresh the last_ping_time
+/* Send a PING to the specified instance and refresh the act_ping_time
* if it is zero (that is, if we received a pong for the previous ping).
*
* On error zero is returned, and we can't consider the PING command
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
+ static unsigned long long counters[256];
+ static time_t last;
+ // printf("(%lld) PING %s\n", mstime(), sentinelGetInstanceTypeString(ri));
+ counters[ri->flags & (SRI_SLAVE|SRI_MASTER|SRI_SENTINEL)]++;
+ if (time(NULL)-last >= 5) {
+ printf("slave: %llu master: %llu sentinel: %llu\n",
+ counters[SRI_SLAVE], counters[SRI_MASTER], counters[SRI_SENTINEL]);
+ last = time(NULL);
+ }
int retval = redisAsyncCommand(ri->link->cc,
sentinelPingReplyCallback, ri, "PING");
if (retval == REDIS_OK) {
ri->link->pending_commands++;
- /* We update the ping time only if we received the pong for
- * the previous ping, otherwise we are technically waiting
- * since the first ping that did not received a reply. */
- if (ri->link->last_ping_time == 0) ri->link->last_ping_time = mstime();
+ ri->link->last_ping_time = mstime();
+ /* We update the active ping time only if we received the pong for
+ * the previous ping, otherwise we are technically waiting since the
+ * first ping that did not received a reply. */
+ if (ri->link->act_ping_time == 0)
+ ri->link->act_ping_time = ri->link->last_ping_time;
return 1;
} else {
return 0;
@@ -2506,9 +2524,7 @@ void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
sentinelInfoReplyCallback, ri, "INFO");
if (retval == REDIS_OK) ri->link->pending_commands++;
} else if ((now - ri->link->last_pong_time) > ping_period &&
- (ri->link->last_ping_time == 0 ||
- now - ri->link->last_ping_time > ping_period*2))
- {
+ (now - ri->link->last_ping_time) > ping_period/2) {
/* Send PING to all the three kinds of instances. */
sentinelSendPing(ri);
} else if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
@@ -2592,7 +2608,7 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
addReplyBulkCString(c,"last-ping-sent");
addReplyBulkLongLong(c,
- ri->link->last_ping_time ? (mstime() - ri->link->last_ping_time) : 0);
+ ri->link->act_ping_time ? (mstime() - ri->link->act_ping_time) : 0);
fields++;
addReplyBulkCString(c,"last-ok-ping-reply");
@@ -3202,8 +3218,8 @@ void sentinelPublishCommand(redisClient *c) {
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = 0;
- if (ri->link->last_ping_time)
- elapsed = mstime() - ri->link->last_ping_time;
+ if (ri->link->act_ping_time)
+ elapsed = mstime() - ri->link->act_ping_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
@@ -3214,10 +3230,10 @@ void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
if (ri->link->cc &&
(mstime() - ri->link->cc_conn_time) >
SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
- ri->link->last_ping_time != 0 && /* Ther is a pending ping... */
+ ri->link->act_ping_time != 0 && /* Ther is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
- (mstime() - ri->link->last_ping_time) > (ri->down_after_period/2) &&
+ (mstime() - ri->link->act_ping_time) > (ri->down_after_period/2) &&
(mstime() - ri->link->last_pong_time) > (ri->down_after_period/2))
{
instanceLinkCloseConnection(ri->link,ri->link->cc);