summaryrefslogtreecommitdiff
path: root/src/sentinel.c
diff options
context:
space:
mode:
authorMoti Cohen <moti.cohen@redis.com>2022-01-29 21:00:29 +0200
committerGitHub <noreply@github.com>2022-01-29 21:00:29 +0200
commit79f089bdd92348d5fa3965258f539752f2eb8e78 (patch)
treef230ae05457b8e7a32d9dd4f609f20f5201c93f2 /src/sentinel.c
parent2d391e62bd5c80d75f50754bd5f05ef15a73fab7 (diff)
downloadredis-79f089bdd92348d5fa3965258f539752f2eb8e78.tar.gz
Fixed Sentinel support for hostnames (#10146)
Sentinel tries to resolve instances hostname to IP only during registration. It might be that the instance is unavailable during that time, such as leader crashed and failover took place. Yet, promoted replica must support: - Register leader, even if it fails to resolve its hostname during failover - Try later to resolve it, if instance is disconnected. Note that this condition also support ip-change of an instance.
Diffstat (limited to 'src/sentinel.c')
-rw-r--r--src/sentinel.c54
1 files changed, 43 insertions, 11 deletions
diff --git a/src/sentinel.c b/src/sentinel.c
index dbc3e9876..f65e29876 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -552,10 +552,10 @@ void sentinelIsRunning(void) {
/* Create a sentinelAddr object and return it on success.
* On error NULL is returned and errno is set to:
- * ENOENT: Can't resolve the hostname.
+ * ENOENT: Can't resolve the hostname, unless accept_unresolved is non-zero.
* EINVAL: Invalid port number.
*/
-sentinelAddr *createSentinelAddr(char *hostname, int port) {
+sentinelAddr *createSentinelAddr(char *hostname, int port, int is_accept_unresolved) {
char ip[NET_IP_STR_LEN];
sentinelAddr *sa;
@@ -565,8 +565,14 @@ sentinelAddr *createSentinelAddr(char *hostname, int port) {
}
if (anetResolve(NULL,hostname,ip,sizeof(ip),
sentinel.resolve_hostnames ? ANET_NONE : ANET_IP_ONLY) == ANET_ERR) {
- errno = ENOENT;
- return NULL;
+ serverLog(LL_WARNING, "Failed to resolve hostname '%s'", hostname);
+ if (sentinel.resolve_hostnames && is_accept_unresolved) {
+ ip[0] = '\0';
+ }
+ else {
+ errno = ENOENT;
+ return NULL;
+ }
}
sa = zmalloc(sizeof(*sa));
sa->hostname = sdsnew(hostname);
@@ -598,6 +604,15 @@ int sentinelAddrIsEqual(sentinelAddr *a, sentinelAddr *b) {
return a->port == b->port && !strcasecmp(a->ip,b->ip);
}
+/* Return non-zero if the two addresses are equal, either by address
+ * or by hostname if they could not have been resolved.
+ */
+int sentinelAddrOrHostnameEqual(sentinelAddr *a, sentinelAddr *b) {
+ return a->port == b->port &&
+ (!strcmp(a->ip, b->ip) ||
+ !strcasecmp(a->hostname, b->hostname));
+}
+
/* Return non-zero if a hostname matches an address. */
int sentinelAddrEqualsHostname(sentinelAddr *a, char *hostname) {
char ip[NET_IP_STR_LEN];
@@ -1259,7 +1274,7 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
serverAssert((flags & SRI_MASTER) || master != NULL);
/* Check address validity. */
- addr = createSentinelAddr(hostname,port);
+ addr = createSentinelAddr(hostname,port,1);
if (addr == NULL) return NULL;
/* For slaves use ip/host:port as name. */
@@ -1387,7 +1402,7 @@ sentinelRedisInstance *sentinelRedisInstanceLookupSlave(
* If that is the case, depending on configuration we either resolve
* it and use the IP address or fail.
*/
- addr = createSentinelAddr(slave_addr, port);
+ addr = createSentinelAddr(slave_addr, port, 0);
if (!addr) return NULL;
key = announceSentinelAddrAndPort(addr);
releaseSentinelAddr(addr);
@@ -1450,8 +1465,10 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
serverAssert(addr || runid); /* User must pass at least one search param. */
if (addr != NULL) {
- /* Resolve addr, we use the IP as a key even if a hostname is used */
- ri_addr = createSentinelAddr(addr, port);
+ /* Try to resolve addr. If hostnames are used, we're accepting an ri_addr
+ * that contains an hostname only and can still be matched based on that.
+ */
+ ri_addr = createSentinelAddr(addr,port,1);
if (!ri_addr) return NULL;
}
di = dictGetIterator(instances);
@@ -1460,8 +1477,7 @@ sentinelRedisInstance *getSentinelRedisInstanceByAddrAndRunID(dict *instances, c
if (runid && !ri->runid) continue;
if ((runid == NULL || strcmp(ri->runid, runid) == 0) &&
- (addr == NULL || (strcmp(ri->addr->ip, ri_addr->ip) == 0 &&
- ri->addr->port == port)))
+ (addr == NULL || sentinelAddrOrHostnameEqual(ri->addr, ri_addr)))
{
instance = ri;
break;
@@ -1565,7 +1581,7 @@ int sentinelResetMasterAndChangeAddress(sentinelRedisInstance *master, char *hos
dictIterator *di;
dictEntry *de;
- newaddr = createSentinelAddr(hostname,port);
+ newaddr = createSentinelAddr(hostname,port,0);
if (newaddr == NULL) return C_ERR;
/* There can be only 0 or 1 slave that has the newaddr.
@@ -2355,6 +2371,7 @@ static int instanceLinkNegotiateTLS(redisAsyncContext *context) {
* is disconnected. Note that link->disconnected is true even if just
* one of the two links (commands and pub/sub) is missing. */
void sentinelReconnectInstance(sentinelRedisInstance *ri) {
+
if (ri->link->disconnected == 0) return;
if (ri->addr->port == 0) return; /* port == 0 means invalid address. */
instanceLink *link = ri->link;
@@ -2365,7 +2382,21 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
/* Commands connection. */
if (link->cc == NULL) {
+
+ /* It might be that the instance is disconnected because it wasn't available earlier when the instance
+ * allocated, say during failover, and therefore we failed to resolve its ip.
+ * Another scenario is that the instance restarted with new ip, and we should resolve its new ip based on
+ * its hostname */
+ if (sentinel.resolve_hostnames) {
+ sentinelAddr *tryResolveAddr = createSentinelAddr(ri->addr->hostname, ri->addr->port, 0);
+ if (tryResolveAddr != NULL) {
+ releaseSentinelAddr(ri->addr);
+ ri->addr = tryResolveAddr;
+ }
+ }
+
link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,server.bind_source_addr);
+
if (link->cc && !link->cc->err) anetCloexec(link->cc->c.fd);
if (!link->cc) {
sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to establish connection");
@@ -4892,6 +4923,7 @@ sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
max_master_down_time += master->down_after_period * 10;
di = dictGetIterator(master->slaves);
+
while((de = dictNext(di)) != NULL) {
sentinelRedisInstance *slave = dictGetVal(de);
mstime_t info_validity_time;