summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSalvatore Sanfilippo <antirez@gmail.com>2019-10-16 17:42:35 +0200
committerGitHub <noreply@github.com>2019-10-16 17:42:35 +0200
commitefb6495a446a92328512f8a66db701dab95fb933 (patch)
treeea0c8e5124037fdb84263555e31702233811aaf7
parentb8e02f2b4005febbdaa11ff978c4f98b664464c9 (diff)
parent8e29b0b22b02cd5d5e9a5f51e7c60fa745254578 (diff)
downloadredis-efb6495a446a92328512f8a66db701dab95fb933.tar.gz
Merge pull request #6236 from yossigo/poc/conns
Abstract Connections I/O API & TLS Support
-rw-r--r--TLS.md106
-rw-r--r--deps/Makefile6
-rw-r--r--redis.conf70
-rw-r--r--src/Makefile10
-rw-r--r--src/ae.c14
-rw-r--r--src/ae.h2
-rw-r--r--src/ae_epoll.c4
-rw-r--r--src/anet.c22
-rw-r--r--src/anet.h12
-rw-r--r--src/aof.c8
-rw-r--r--src/cluster.c235
-rw-r--r--src/cluster.h2
-rw-r--r--src/config.c180
-rw-r--r--src/connection.c407
-rw-r--r--src/connection.h220
-rw-r--r--src/connhelpers.h85
-rw-r--r--src/debug.c11
-rw-r--r--src/module.c8
-rw-r--r--src/networking.c264
-rw-r--r--src/rdb.c193
-rw-r--r--src/redis-cli.c132
-rw-r--r--src/replication.c359
-rw-r--r--src/rio.c236
-rw-r--r--src/rio.h23
-rw-r--r--src/scripting.c22
-rw-r--r--src/sentinel.c37
-rw-r--r--src/server.c150
-rw-r--r--src/server.h59
-rw-r--r--src/tls.c808
-rw-r--r--tests/cluster/run.tcl1
-rw-r--r--tests/cluster/tests/04-resharding.tcl2
-rw-r--r--tests/cluster/tests/12-replica-migration-2.tcl4
-rw-r--r--tests/helpers/bg_block_op.tcl8
-rw-r--r--tests/helpers/bg_complex_data.tcl8
-rw-r--r--tests/helpers/gen_write_load.tcl8
-rw-r--r--tests/instances.tcl26
-rw-r--r--tests/integration/aof-race.tcl7
-rw-r--r--tests/integration/aof.tcl43
-rw-r--r--tests/integration/block-repl.tcl10
-rw-r--r--tests/integration/psync2-reg.tcl3
-rw-r--r--tests/integration/redis-cli.tcl9
-rw-r--r--tests/integration/replication.tcl164
-rw-r--r--tests/sentinel/run.tcl1
-rw-r--r--tests/sentinel/tests/07-down-conditions.tcl3
-rw-r--r--tests/support/cli.tcl19
-rw-r--r--tests/support/cluster.tcl4
-rw-r--r--tests/support/redis.tcl21
-rw-r--r--tests/support/server.tcl32
-rw-r--r--tests/support/util.tcl4
-rw-r--r--tests/test_helper.tcl23
-rw-r--r--tests/unit/limits.tcl7
-rw-r--r--tests/unit/other.tcl6
-rw-r--r--tests/unit/protocol.tcl6
-rw-r--r--tests/unit/tls.tcl105
-rw-r--r--tests/unit/wait.tcl5
-rwxr-xr-xutils/gen-test-certs.sh23
56 files changed, 3452 insertions, 785 deletions
diff --git a/TLS.md b/TLS.md
new file mode 100644
index 000000000..76fe0be2e
--- /dev/null
+++ b/TLS.md
@@ -0,0 +1,106 @@
+TLS Support -- Work In Progress
+===============================
+
+This is a brief note to capture current thoughts/ideas and track pending action
+items.
+
+Getting Started
+---------------
+
+### Building
+
+To build with TLS support you'll need OpenSSL development libraries (e.g.
+libssl-dev on Debian/Ubuntu).
+
+Run `make BUILD_TLS=yes`.
+
+### Tests
+
+To run Redis test suite with TLS, you'll need TLS support for TCL (i.e.
+`tcl-tls` package on Debian/Ubuntu).
+
+1. Run `./utils/gen-test-certs.sh` to generate a root CA and a server
+ certificate.
+
+2. Run `./runtest --tls` or `./runtest-cluster --tls` to run Redis and Redis
+ Cluster tests in TLS mode.
+
+### Running manually
+
+To manually run a Redis server with TLS mode (assuming `gen-test-certs.sh` was
+invoked so sample certificates/keys are available):
+
+ ./src/redis-server --tls-port 6379 --port 0 \
+ --tls-cert-file ./tests/tls/redis.crt \
+ --tls-key-file ./tests/tls/redis.key \
+ --tls-ca-cert-file ./tests/tls/ca.crt
+
+To connect to this Redis server with `redis-cli`:
+
+ ./src/redis-cli --tls \
+ --cert ./tests/tls/redis.crt \
+ --key ./tests/tls/redis.key \
+ --cacert ./tests/tls/ca.crt
+
+This will disable TCP and enable TLS on port 6379. It's also possible to have
+both TCP and TLS available, but you'll need to assign different ports.
+
+To make a Replica connect to the master using TLS, use `--tls-replication yes`,
+and to make Redis Cluster use TLS across nodes use `--tls-cluster yes`.
+
+Connections
+-----------
+
+All socket operations now go through a connection abstraction layer that hides
+I/O and read/write event handling from the caller.
+
+**Multi-threading I/O is not currently supported for TLS**, as a TLS connection
+needs to do its own manipulation of AE events which is not thread safe. The
+solution is probably to manage independent AE loops for I/O threads and longer
+term association of connections with threads. This may potentially improve
+overall performance as well.
+
+Sync IO for TLS is currently implemented in a hackish way, i.e. making the
+socket blocking and configuring socket-level timeout. This means the timeout
+value may not be so accurate, and there would be a lot of syscall overhead.
+However I believe that getting rid of syncio completely in favor of pure async
+work is probably a better move than trying to fix that. For replication it would
+probably not be so hard. For cluster keys migration it might be more difficult,
+but there are probably other good reasons to improve that part anyway.
+
+To-Do List
+==========
+
+Additional TLS Features
+-----------------------
+
+1. Add metrics to INFO?
+2. Add session caching support. Check if/how it's handled by clients to assess
+ how useful/important it is.
+
+redis-benchmark
+---------------
+
+The current implementation is a mix of using hiredis for parsing and basic
+networking (establishing connections), but directly manipulating sockets for
+most actions.
+
+This will need to be cleaned up for proper TLS support. The best approach is
+probably to migrate to hiredis async mode.
+
+redis-cli
+---------
+
+1. Add support for TLS in --slave and --rdb modes.
+
+Others
+------
+
+Consider the implications of allowing TLS to be configured on a separate port,
+making Redis listening on multiple ports.
+
+This impacts many things, like
+1. Startup banner port notification
+2. Proctitle
+3. How slaves announce themselves
+4. Cluster bus port calculation
diff --git a/deps/Makefile b/deps/Makefile
index eb35c1e1f..700867f3b 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -41,9 +41,13 @@ distclean:
.PHONY: distclean
+ifeq ($(BUILD_TLS),yes)
+ HIREDIS_MAKE_FLAGS = USE_SSL=1
+endif
+
hiredis: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
- cd hiredis && $(MAKE) static
+ cd hiredis && $(MAKE) static $(HIREDIS_MAKE_FLAGS)
.PHONY: hiredis
diff --git a/redis.conf b/redis.conf
index 50ba823ac..408426f15 100644
--- a/redis.conf
+++ b/redis.conf
@@ -129,6 +129,76 @@ timeout 0
# Redis default starting with Redis 3.2.1.
tcp-keepalive 300
+################################# TLS/SSL #####################################
+
+# By default, TLS/SSL is disabled. To enable it, the "tls-port" configuration
+# directive can be used to define TLS-listening ports. To enable TLS on the
+# default port, use:
+#
+# port 0
+# tls-port 6379
+
+# Configure a X.509 certificate and private key to use for authenticating the
+# server to connected clients, masters or cluster peers. These files should be
+# PEM formatted.
+#
+# tls-cert-file redis.crt tls-key-file redis.key
+
+# Configure a DH parameters file to enable Diffie-Hellman (DH) key exchange:
+#
+# tls-dh-params-file redis.dh
+
+# Configure a CA certificate(s) bundle or directory to authenticate TLS/SSL
+# clients and peers. Redis requires an explicit configuration of at least one
+# of these, and will not implicitly use the system wide configuration.
+#
+# tls-ca-cert-file ca.crt
+# tls-ca-cert-dir /etc/ssl/certs
+
+# If TLS/SSL clients are required to authenticate using a client side
+# certificate, use this directive.
+#
+# Note: this applies to all incoming clients, including replicas.
+#
+# tls-auth-clients yes
+
+# If TLS/SSL should be used when connecting as a replica to a master, enable
+# this configuration directive:
+#
+# tls-replication yes
+
+# If TLS/SSL should be used for the Redis Cluster bus, enable this configuration
+# directive.
+#
+# NOTE: If TLS/SSL is enabled for Cluster Bus, mutual authentication is always
+# enforced.
+#
+# tls-cluster yes
+
+# Explicitly specify TLS versions to support. Allowed values are case insensitive
+# and include "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3" (OpenSSL >= 1.1.1) or
+# "default" which is currently >= TLSv1.1.
+#
+# tls-protocols TLSv1.2
+
+# Configure allowed ciphers. See the ciphers(1ssl) manpage for more information
+# about the syntax of this string.
+#
+# Note: this configuration applies only to <= TLSv1.2.
+#
+# tls-ciphers DEFAULT:!MEDIUM
+
+# Configure allowed TLSv1.3 ciphersuites. See the ciphers(1ssl) manpage for more
+# information about the syntax of this string, and specifically for TLSv1.3
+# ciphersuites.
+#
+# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256
+
+# When choosing a cipher, use the server's preference instead of the client
+# preference. By default, the server follows the client's preference.
+#
+# tls-prefer-server-cipher yes
+
################################# GENERAL #####################################
# By default Redis does not run as a daemon. Use 'yes' if you need it.
diff --git a/src/Makefile b/src/Makefile
index a76adbf4f..9fc230f94 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -93,6 +93,8 @@ else
ifeq ($(uname_S),Darwin)
# Darwin
FINAL_LIBS+= -ldl
+ OPENSSL_CFLAGS=-I/usr/local/opt/openssl/include
+ OPENSSL_LDFLAGS=-L/usr/local/opt/openssl/lib
else
ifeq ($(uname_S),AIX)
# AIX
@@ -145,6 +147,12 @@ ifeq ($(MALLOC),jemalloc)
FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS)
endif
+ifeq ($(BUILD_TLS),yes)
+ FINAL_CFLAGS+=-DUSE_OPENSSL $(OPENSSL_CFLAGS)
+ FINAL_LDFLAGS+=$(OPENSSL_LDFLAGS)
+ FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a -lssl -lcrypto
+endif
+
REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS)
REDIS_LD=$(QUIET_LINK)$(CC) $(FINAL_LDFLAGS)
REDIS_INSTALL=$(QUIET_INSTALL)$(INSTALL)
@@ -164,7 +172,7 @@ endif
REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
-REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o sha256.o
+REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o
REDIS_BENCHMARK_NAME=redis-benchmark
diff --git a/src/ae.c b/src/ae.c
index 53629ef77..2c1dae512 100644
--- a/src/ae.c
+++ b/src/ae.c
@@ -76,6 +76,7 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
eventLoop->maxfd = -1;
eventLoop->beforesleep = NULL;
eventLoop->aftersleep = NULL;
+ eventLoop->flags = 0;
if (aeApiCreate(eventLoop) == -1) goto err;
/* Events with mask == AE_NONE are not set. So let's initialize the
* vector with it. */
@@ -97,6 +98,14 @@ int aeGetSetSize(aeEventLoop *eventLoop) {
return eventLoop->setsize;
}
+/* Tells the next iteration/s of the event processing to set timeout of 0. */
+void aeSetDontWait(aeEventLoop *eventLoop, int noWait) {
+ if (noWait)
+ eventLoop->flags |= AE_DONT_WAIT;
+ else
+ eventLoop->flags &= ~AE_DONT_WAIT;
+}
+
/* Resize the maximum set size of the event loop.
* If the requested set size is smaller than the current set size, but
* there is already a file descriptor in use that is >= the requested
@@ -406,6 +415,11 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
}
}
+ if (eventLoop->flags & AE_DONT_WAIT) {
+ tv.tv_sec = tv.tv_usec = 0;
+ tvp = &tv;
+ }
+
/* Call the multiplexing API, will return only on timeout or when
* some event fires. */
numevents = aeApiPoll(eventLoop, tvp);
diff --git a/src/ae.h b/src/ae.h
index 184fe3d1b..9acd72434 100644
--- a/src/ae.h
+++ b/src/ae.h
@@ -106,6 +106,7 @@ typedef struct aeEventLoop {
void *apidata; /* This is used for polling API specific data */
aeBeforeSleepProc *beforesleep;
aeBeforeSleepProc *aftersleep;
+ int flags;
} aeEventLoop;
/* Prototypes */
@@ -128,5 +129,6 @@ void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
int aeGetSetSize(aeEventLoop *eventLoop);
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
+void aeSetDontWait(aeEventLoop *eventLoop, int noWait);
#endif
diff --git a/src/ae_epoll.c b/src/ae_epoll.c
index 410aac70d..fa197297e 100644
--- a/src/ae_epoll.c
+++ b/src/ae_epoll.c
@@ -121,8 +121,8 @@ static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
if (e->events & EPOLLIN) mask |= AE_READABLE;
if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
- if (e->events & EPOLLERR) mask |= AE_WRITABLE;
- if (e->events & EPOLLHUP) mask |= AE_WRITABLE;
+ if (e->events & EPOLLERR) mask |= AE_WRITABLE|AE_READABLE;
+ if (e->events & EPOLLHUP) mask |= AE_WRITABLE|AE_READABLE;
eventLoop->fired[j].fd = e->data.fd;
eventLoop->fired[j].mask = mask;
}
diff --git a/src/anet.c b/src/anet.c
index 2088f4fb1..46ea7e145 100644
--- a/src/anet.c
+++ b/src/anet.c
@@ -279,8 +279,8 @@ static int anetCreateSocket(char *err, int domain) {
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
-static int anetTcpGenericConnect(char *err, char *addr, int port,
- char *source_addr, int flags)
+static int anetTcpGenericConnect(char *err, const char *addr, int port,
+ const char *source_addr, int flags)
{
int s = ANET_ERR, rv;
char portstr[6]; /* strlen("65535") + 1; */
@@ -359,31 +359,31 @@ end:
}
}
-int anetTcpConnect(char *err, char *addr, int port)
+int anetTcpConnect(char *err, const char *addr, int port)
{
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONE);
}
-int anetTcpNonBlockConnect(char *err, char *addr, int port)
+int anetTcpNonBlockConnect(char *err, const char *addr, int port)
{
return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK);
}
-int anetTcpNonBlockBindConnect(char *err, char *addr, int port,
- char *source_addr)
+int anetTcpNonBlockBindConnect(char *err, const char *addr, int port,
+ const char *source_addr)
{
return anetTcpGenericConnect(err,addr,port,source_addr,
ANET_CONNECT_NONBLOCK);
}
-int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port,
- char *source_addr)
+int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port,
+ const char *source_addr)
{
return anetTcpGenericConnect(err,addr,port,source_addr,
ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING);
}
-int anetUnixGenericConnect(char *err, char *path, int flags)
+int anetUnixGenericConnect(char *err, const char *path, int flags)
{
int s;
struct sockaddr_un sa;
@@ -411,12 +411,12 @@ int anetUnixGenericConnect(char *err, char *path, int flags)
return s;
}
-int anetUnixConnect(char *err, char *path)
+int anetUnixConnect(char *err, const char *path)
{
return anetUnixGenericConnect(err,path,ANET_CONNECT_NONE);
}
-int anetUnixNonBlockConnect(char *err, char *path)
+int anetUnixNonBlockConnect(char *err, const char *path)
{
return anetUnixGenericConnect(err,path,ANET_CONNECT_NONBLOCK);
}
diff --git a/src/anet.h b/src/anet.h
index dd735240d..23f19643c 100644
--- a/src/anet.h
+++ b/src/anet.h
@@ -49,12 +49,12 @@
#undef ip_len
#endif
-int anetTcpConnect(char *err, char *addr, int port);
-int anetTcpNonBlockConnect(char *err, char *addr, int port);
-int anetTcpNonBlockBindConnect(char *err, char *addr, int port, char *source_addr);
-int anetTcpNonBlockBestEffortBindConnect(char *err, char *addr, int port, char *source_addr);
-int anetUnixConnect(char *err, char *path);
-int anetUnixNonBlockConnect(char *err, char *path);
+int anetTcpConnect(char *err, const char *addr, int port);
+int anetTcpNonBlockConnect(char *err, const char *addr, int port);
+int anetTcpNonBlockBindConnect(char *err, const char *addr, int port, const char *source_addr);
+int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, const char *source_addr);
+int anetUnixConnect(char *err, const char *path);
+int anetUnixNonBlockConnect(char *err, const char *path);
int anetRead(int fd, char *buf, int count);
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len);
int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len);
diff --git a/src/aof.c b/src/aof.c
index 4e6af7c1c..32684eb89 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -385,6 +385,10 @@ void flushAppendOnlyFile(int force) {
* there is much to do about the whole server stopping for power problems
* or alike */
+ if (server.aof_flush_sleep && sdslen(server.aof_buf)) {
+ usleep(server.aof_flush_sleep);
+ }
+
latencyStartMonitor(latency);
nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));
latencyEndMonitor(latency);
@@ -652,7 +656,7 @@ struct client *createFakeClient(void) {
struct client *c = zmalloc(sizeof(*c));
selectDb(c,0);
- c->fd = -1;
+ c->conn = NULL;
c->name = NULL;
c->querybuf = sdsempty();
c->querybuf_peak = 0;
@@ -835,6 +839,8 @@ int loadAppendOnlyFile(char *filename) {
freeFakeClientArgv(fakeClient);
fakeClient->cmd = NULL;
if (server.aof_load_truncated) valid_up_to = ftello(fp);
+ if (server.key_load_delay)
+ usleep(server.key_load_delay);
}
/* This point can only be reached when EOF is reached without errors.
diff --git a/src/cluster.c b/src/cluster.c
index 93be2aa32..a7d8a02c3 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -49,7 +49,7 @@ clusterNode *myself = NULL;
clusterNode *createClusterNode(char *nodename, int flags);
int clusterAddNode(clusterNode *node);
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void clusterReadHandler(connection *conn);
void clusterSendPing(clusterLink *link, int type);
void clusterSendFail(char *nodename);
void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request);
@@ -477,7 +477,8 @@ void clusterInit(void) {
/* Port sanity check II
* The other handshake port check is triggered too late to stop
* us from trying to use a too-high cluster port number. */
- if (server.port > (65535-CLUSTER_PORT_INCR)) {
+ int port = server.tls_cluster ? server.tls_port : server.port;
+ if (port > (65535-CLUSTER_PORT_INCR)) {
serverLog(LL_WARNING, "Redis port number too high. "
"Cluster communication port is 10,000 port "
"numbers higher than your Redis port. "
@@ -485,8 +486,7 @@ void clusterInit(void) {
"lower than 55535.");
exit(1);
}
-
- if (listenToPort(server.port+CLUSTER_PORT_INCR,
+ if (listenToPort(port+CLUSTER_PORT_INCR,
server.cfd,&server.cfd_count) == C_ERR)
{
exit(1);
@@ -508,8 +508,8 @@ void clusterInit(void) {
/* Set myself->port / cport to my listening ports, we'll just need to
* discover the IP address via MEET messages. */
- myself->port = server.port;
- myself->cport = server.port+CLUSTER_PORT_INCR;
+ myself->port = port;
+ myself->cport = port+CLUSTER_PORT_INCR;
if (server.cluster_announce_port)
myself->port = server.cluster_announce_port;
if (server.cluster_announce_bus_port)
@@ -593,7 +593,7 @@ clusterLink *createClusterLink(clusterNode *node) {
link->sndbuf = sdsempty();
link->rcvbuf = sdsempty();
link->node = node;
- link->fd = -1;
+ link->conn = NULL;
return link;
}
@@ -601,23 +601,45 @@ clusterLink *createClusterLink(clusterNode *node) {
* This function will just make sure that the original node associated
* with this link will have the 'link' field set to NULL. */
void freeClusterLink(clusterLink *link) {
- if (link->fd != -1) {
- aeDeleteFileEvent(server.el, link->fd, AE_READABLE|AE_WRITABLE);
+ if (link->conn) {
+ connClose(link->conn);
+ link->conn = NULL;
}
sdsfree(link->sndbuf);
sdsfree(link->rcvbuf);
if (link->node)
link->node->link = NULL;
- close(link->fd);
zfree(link);
}
+static void clusterConnAcceptHandler(connection *conn) {
+ clusterLink *link;
+
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node connection: %s", connGetLastError(conn));
+ connClose(conn);
+ return;
+ }
+
+ /* Create a link object we use to handle the connection.
+ * It gets passed to the readable handler when data is available.
+ * Initiallly the link->node pointer is set to NULL as we don't know
+ * which node is, but the right node is references once we know the
+ * node identity. */
+ link = createClusterLink(NULL);
+ link->conn = conn;
+ connSetPrivateData(conn, link);
+
+ /* Register read handler */
+ connSetReadHandler(conn, clusterReadHandler);
+}
+
#define MAX_CLUSTER_ACCEPTS_PER_CALL 1000
void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
int cport, cfd;
int max = MAX_CLUSTER_ACCEPTS_PER_CALL;
char cip[NET_IP_STR_LEN];
- clusterLink *link;
UNUSED(el);
UNUSED(mask);
UNUSED(privdata);
@@ -634,19 +656,24 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Error accepting cluster node: %s", server.neterr);
return;
}
- anetNonBlock(NULL,cfd);
- anetEnableTcpNoDelay(NULL,cfd);
+
+ connection *conn = server.tls_cluster ? connCreateAcceptedTLS(cfd,1) : connCreateAcceptedSocket(cfd);
+ connNonBlock(conn);
+ connEnableTcpNoDelay(conn);
/* Use non-blocking I/O for cluster messages. */
- serverLog(LL_VERBOSE,"Accepted cluster node %s:%d", cip, cport);
- /* Create a link object we use to handle the connection.
- * It gets passed to the readable handler when data is available.
- * Initiallly the link->node pointer is set to NULL as we don't know
- * which node is, but the right node is references once we know the
- * node identity. */
- link = createClusterLink(NULL);
- link->fd = cfd;
- aeCreateFileEvent(server.el,cfd,AE_READABLE,clusterReadHandler,link);
+ serverLog(LL_VERBOSE,"Accepting cluster node connection from %s:%d", cip, cport);
+
+ /* Accept the connection now. connAccept() may call our handler directly
+ * or schedule it for later depending on connection implementation.
+ */
+ if (connAccept(conn, clusterConnAcceptHandler) == C_ERR) {
+ serverLog(LL_VERBOSE,
+ "Error accepting cluster node connection: %s",
+ connGetLastError(conn));
+ connClose(conn);
+ return;
+ }
}
}
@@ -1447,7 +1474,7 @@ void nodeIp2String(char *buf, clusterLink *link, char *announced_ip) {
memcpy(buf,announced_ip,NET_IP_STR_LEN);
buf[NET_IP_STR_LEN-1] = '\0'; /* We are not sure the input is sane. */
} else {
- anetPeerToString(link->fd, buf, NET_IP_STR_LEN, NULL);
+ connPeerToString(link->conn, buf, NET_IP_STR_LEN, NULL);
}
}
@@ -1751,7 +1778,7 @@ int clusterProcessPacket(clusterLink *link) {
{
char ip[NET_IP_STR_LEN];
- if (anetSockName(link->fd,ip,sizeof(ip),NULL) != -1 &&
+ if (connSockName(link->conn,ip,sizeof(ip),NULL) != -1 &&
strcmp(ip,myself->ip))
{
memcpy(myself->ip,ip,NET_IP_STR_LEN);
@@ -2118,35 +2145,76 @@ void handleLinkIOError(clusterLink *link) {
/* Send data. This is handled using a trivial send buffer that gets
* consumed by write(). We don't try to optimize this for speed too much
* as this is a very low traffic channel. */
-void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
- clusterLink *link = (clusterLink*) privdata;
+void clusterWriteHandler(connection *conn) {
+ clusterLink *link = connGetPrivateData(conn);
ssize_t nwritten;
- UNUSED(el);
- UNUSED(mask);
- nwritten = write(fd, link->sndbuf, sdslen(link->sndbuf));
+ nwritten = connWrite(conn, link->sndbuf, sdslen(link->sndbuf));
if (nwritten <= 0) {
serverLog(LL_DEBUG,"I/O error writing to node link: %s",
- (nwritten == -1) ? strerror(errno) : "short write");
+ (nwritten == -1) ? connGetLastError(conn) : "short write");
handleLinkIOError(link);
return;
}
sdsrange(link->sndbuf,nwritten,-1);
if (sdslen(link->sndbuf) == 0)
- aeDeleteFileEvent(server.el, link->fd, AE_WRITABLE);
+ connSetWriteHandler(link->conn, NULL);
+}
+
+/* A connect handler that gets called when a connection to another node
+ * gets established.
+ */
+void clusterLinkConnectHandler(connection *conn) {
+ clusterLink *link = connGetPrivateData(conn);
+ clusterNode *node = link->node;
+
+ /* Check if connection succeeded */
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_VERBOSE, "Connection with Node %.40s at %s:%d failed: %s",
+ node->name, node->ip, node->cport,
+ connGetLastError(conn));
+ freeClusterLink(link);
+ return;
+ }
+
+ /* Register a read handler from now on */
+ connSetReadHandler(conn, clusterReadHandler);
+
+ /* Queue a PING in the new connection ASAP: this is crucial
+ * to avoid false positives in failure detection.
+ *
+ * If the node is flagged as MEET, we send a MEET message instead
+ * of a PING one, to force the receiver to add us in its node
+ * table. */
+ mstime_t old_ping_sent = node->ping_sent;
+ clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
+ CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
+ if (old_ping_sent) {
+ /* If there was an active ping before the link was
+ * disconnected, we want to restore the ping time, otherwise
+ * replaced by the clusterSendPing() call. */
+ node->ping_sent = old_ping_sent;
+ }
+ /* We can clear the flag after the first packet is sent.
+ * If we'll never receive a PONG, we'll never send new packets
+ * to this node. Instead after the PONG is received and we
+ * are no longer in meet/handshake status, we want to send
+ * normal PING packets. */
+ node->flags &= ~CLUSTER_NODE_MEET;
+
+ serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
+ node->name, node->ip, node->cport);
}
/* Read data. Try to read the first field of the header first to check the
* full length of the packet. When a whole packet is in memory this function
* will call the function to process the packet. And so forth. */
-void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+void clusterReadHandler(connection *conn) {
clusterMsg buf[1];
ssize_t nread;
clusterMsg *hdr;
- clusterLink *link = (clusterLink*) privdata;
+ clusterLink *link = connGetPrivateData(conn);
unsigned int readlen, rcvbuflen;
- UNUSED(el);
- UNUSED(mask);
while(1) { /* Read as long as there is data to read. */
rcvbuflen = sdslen(link->rcvbuf);
@@ -2174,13 +2242,13 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
if (readlen > sizeof(buf)) readlen = sizeof(buf);
}
- nread = read(fd,buf,readlen);
- if (nread == -1 && errno == EAGAIN) return; /* No more data ready. */
+ nread = connRead(conn,buf,readlen);
+ if (nread == -1 && (connGetState(conn) == CONN_STATE_CONNECTED)) return; /* No more data ready. */
if (nread <= 0) {
/* I/O error... */
serverLog(LL_DEBUG,"I/O error reading from node link: %s",
- (nread == 0) ? "connection closed" : strerror(errno));
+ (nread == 0) ? "connection closed" : connGetLastError(conn));
handleLinkIOError(link);
return;
} else {
@@ -2209,8 +2277,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* from event handlers that will do stuff with the same link later. */
void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) {
if (sdslen(link->sndbuf) == 0 && msglen != 0)
- aeCreateFileEvent(server.el,link->fd,AE_WRITABLE|AE_BARRIER,
- clusterWriteHandler,link);
+ connSetWriteHandlerWithBarrier(link->conn, clusterWriteHandler, 1);
link->sndbuf = sdscatlen(link->sndbuf, msg, msglen);
@@ -2276,11 +2343,12 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) {
}
/* Handle cluster-announce-port as well. */
+ int port = server.tls_cluster ? server.tls_port : server.port;
int announced_port = server.cluster_announce_port ?
- server.cluster_announce_port : server.port;
+ server.cluster_announce_port : port;
int announced_cport = server.cluster_announce_bus_port ?
server.cluster_announce_bus_port :
- (server.port + CLUSTER_PORT_INCR);
+ (port + CLUSTER_PORT_INCR);
memcpy(hdr->myslots,master->slots,sizeof(hdr->myslots));
memset(hdr->slaveof,0,CLUSTER_NAMELEN);
@@ -3385,13 +3453,11 @@ void clusterCron(void) {
}
if (node->link == NULL) {
- int fd;
- mstime_t old_ping_sent;
- clusterLink *link;
-
- fd = anetTcpNonBlockBindConnect(server.neterr, node->ip,
- node->cport, NET_FIRST_BIND_ADDR);
- if (fd == -1) {
+ clusterLink *link = createClusterLink(node);
+ link->conn = server.tls_cluster ? connCreateTLS() : connCreateSocket();
+ connSetPrivateData(link->conn, link);
+ if (connConnect(link->conn, node->ip, node->cport, NET_FIRST_BIND_ADDR,
+ clusterLinkConnectHandler) == -1) {
/* We got a synchronous error from connect before
* clusterSendPing() had a chance to be called.
* If node->ping_sent is zero, failure detection can't work,
@@ -3401,37 +3467,11 @@ void clusterCron(void) {
serverLog(LL_DEBUG, "Unable to connect to "
"Cluster Node [%s]:%d -> %s", node->ip,
node->cport, server.neterr);
+
+ freeClusterLink(link);
continue;
}
- link = createClusterLink(node);
- link->fd = fd;
node->link = link;
- aeCreateFileEvent(server.el,link->fd,AE_READABLE,
- clusterReadHandler,link);
- /* Queue a PING in the new connection ASAP: this is crucial
- * to avoid false positives in failure detection.
- *
- * If the node is flagged as MEET, we send a MEET message instead
- * of a PING one, to force the receiver to add us in its node
- * table. */
- old_ping_sent = node->ping_sent;
- clusterSendPing(link, node->flags & CLUSTER_NODE_MEET ?
- CLUSTERMSG_TYPE_MEET : CLUSTERMSG_TYPE_PING);
- if (old_ping_sent) {
- /* If there was an active ping before the link was
- * disconnected, we want to restore the ping time, otherwise
- * replaced by the clusterSendPing() call. */
- node->ping_sent = old_ping_sent;
- }
- /* We can clear the flag after the first packet is sent.
- * If we'll never receive a PONG, we'll never send new packets
- * to this node. Instead after the PONG is received and we
- * are no longer in meet/handshake status, we want to send
- * normal PING packets. */
- node->flags &= ~CLUSTER_NODE_MEET;
-
- serverLog(LL_DEBUG,"Connecting with Node %.40s at %s:%d",
- node->name, node->ip, node->cport);
}
}
dictReleaseIterator(di);
@@ -4942,7 +4982,7 @@ void restoreCommand(client *c) {
#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached sockets after 10 sec. */
typedef struct migrateCachedSocket {
- int fd;
+ connection *conn;
long last_dbid;
time_t last_use_time;
} migrateCachedSocket;
@@ -4959,7 +4999,7 @@ typedef struct migrateCachedSocket {
* should be called so that the connection will be created from scratch
* the next time. */
migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) {
- int fd;
+ connection *conn;
sds name = sdsempty();
migrateCachedSocket *cs;
@@ -4979,34 +5019,27 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
/* Too many items, drop one at random. */
dictEntry *de = dictGetRandomKey(server.migrate_cached_sockets);
cs = dictGetVal(de);
- close(cs->fd);
+ connClose(cs->conn);
zfree(cs);
dictDelete(server.migrate_cached_sockets,dictGetKey(de));
}
/* Create the socket */
- fd = anetTcpNonBlockConnect(server.neterr,c->argv[1]->ptr,
- atoi(c->argv[2]->ptr));
- if (fd == -1) {
- sdsfree(name);
- addReplyErrorFormat(c,"Can't connect to target node: %s",
- server.neterr);
- return NULL;
- }
- anetEnableTcpNoDelay(server.neterr,fd);
-
- /* Check if it connects within the specified timeout. */
- if ((aeWait(fd,AE_WRITABLE,timeout) & AE_WRITABLE) == 0) {
- sdsfree(name);
+ conn = server.tls_cluster ? connCreateTLS() : connCreateSocket();
+ if (connBlockingConnect(conn, c->argv[1]->ptr, atoi(c->argv[2]->ptr), timeout)
+ != C_OK) {
addReplySds(c,
sdsnew("-IOERR error or timeout connecting to the client\r\n"));
- close(fd);
+ connClose(conn);
+ sdsfree(name);
return NULL;
}
+ connEnableTcpNoDelay(conn);
/* Add to the cache and return it to the caller. */
cs = zmalloc(sizeof(*cs));
- cs->fd = fd;
+ cs->conn = conn;
+
cs->last_dbid = -1;
cs->last_use_time = server.unixtime;
dictAdd(server.migrate_cached_sockets,name,cs);
@@ -5027,7 +5060,7 @@ void migrateCloseSocket(robj *host, robj *port) {
return;
}
- close(cs->fd);
+ connClose(cs->conn);
zfree(cs);
dictDelete(server.migrate_cached_sockets,name);
sdsfree(name);
@@ -5041,7 +5074,7 @@ void migrateCloseTimedoutSockets(void) {
migrateCachedSocket *cs = dictGetVal(de);
if ((server.unixtime - cs->last_use_time) > MIGRATE_SOCKET_CACHE_TTL) {
- close(cs->fd);
+ connClose(cs->conn);
zfree(cs);
dictDelete(server.migrate_cached_sockets,dictGetKey(de));
}
@@ -5223,7 +5256,7 @@ try_again:
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
- nwritten = syncWrite(cs->fd,buf+pos,towrite,timeout);
+ nwritten = connSyncWrite(cs->conn,buf+pos,towrite,timeout);
if (nwritten != (signed)towrite) {
write_error = 1;
goto socket_err;
@@ -5237,11 +5270,11 @@ try_again:
char buf2[1024]; /* Restore reply. */
/* Read the AUTH reply if needed. */
- if (password && syncReadLine(cs->fd, buf0, sizeof(buf0), timeout) <= 0)
+ if (password && connSyncReadLine(cs->conn, buf0, sizeof(buf0), timeout) <= 0)
goto socket_err;
/* Read the SELECT reply if needed. */
- if (select && syncReadLine(cs->fd, buf1, sizeof(buf1), timeout) <= 0)
+ if (select && connSyncReadLine(cs->conn, buf1, sizeof(buf1), timeout) <= 0)
goto socket_err;
/* Read the RESTORE replies. */
@@ -5256,7 +5289,7 @@ try_again:
if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1));
for (j = 0; j < num_keys; j++) {
- if (syncReadLine(cs->fd, buf2, sizeof(buf2), timeout) <= 0) {
+ if (connSyncReadLine(cs->conn, buf2, sizeof(buf2), timeout) <= 0) {
socket_error = 1;
break;
}
diff --git a/src/cluster.h b/src/cluster.h
index 571b9c543..ffbb29f0d 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -40,7 +40,7 @@ struct clusterNode;
/* clusterLink encapsulates everything needed to talk with a remote node. */
typedef struct clusterLink {
mstime_t ctime; /* Link creation time */
- int fd; /* TCP socket file descriptor */
+ connection *conn; /* Connection to remote node */
sds sndbuf; /* Packet send buffer */
sds rcvbuf; /* Packet reception buffer */
struct clusterNode *node; /* Node related to this link if any, or NULL */
diff --git a/src/config.c b/src/config.c
index 72fb038ea..505dabc9c 100644
--- a/src/config.c
+++ b/src/config.c
@@ -220,7 +220,7 @@ void queueLoadModule(sds path, sds *argv, int argc) {
}
void loadServerConfigFromString(char *config) {
- char *err = NULL;
+ const char *err = NULL;
int linenum = 0, totlines, i;
int slaveof_linenum = 0;
sds *lines;
@@ -515,6 +515,12 @@ void loadServerConfigFromString(char *config) {
err = "rdb-key-save-delay can't be negative";
goto loaderr;
}
+ } else if (!strcasecmp(argv[0],"key-load-delay") && argc==2) {
+ server.key_load_delay = atoi(argv[1]);
+ if (server.key_load_delay < 0) {
+ err = "key-load-delay can't be negative";
+ goto loaderr;
+ }
} else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) {
err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN";
@@ -797,6 +803,45 @@ void loadServerConfigFromString(char *config) {
err = sentinelHandleConfiguration(argv+1,argc-1);
if (err) goto loaderr;
}
+#ifdef USE_OPENSSL
+ } else if (!strcasecmp(argv[0],"tls-port") && argc == 2) {
+ server.tls_port = atoi(argv[1]);
+ if (server.port < 0 || server.port > 65535) {
+ err = "Invalid tls-port"; goto loaderr;
+ }
+ } else if (!strcasecmp(argv[0],"tls-cluster") && argc == 2) {
+ server.tls_cluster = yesnotoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-replication") && argc == 2) {
+ server.tls_replication = yesnotoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-auth-clients") && argc == 2) {
+ server.tls_auth_clients = yesnotoi(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-cert-file") && argc == 2) {
+ zfree(server.tls_ctx_config.cert_file);
+ server.tls_ctx_config.cert_file = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-key-file") && argc == 2) {
+ zfree(server.tls_ctx_config.key_file);
+ server.tls_ctx_config.key_file = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-dh-params-file") && argc == 2) {
+ zfree(server.tls_ctx_config.dh_params_file);
+ server.tls_ctx_config.dh_params_file = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-ca-cert-file") && argc == 2) {
+ zfree(server.tls_ctx_config.ca_cert_file);
+ server.tls_ctx_config.ca_cert_file = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-ca-cert-dir") && argc == 2) {
+ zfree(server.tls_ctx_config.ca_cert_dir);
+ server.tls_ctx_config.ca_cert_dir = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-protocols") && argc >= 2) {
+ zfree(server.tls_ctx_config.protocols);
+ server.tls_ctx_config.protocols = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-ciphers") && argc == 2) {
+ zfree(server.tls_ctx_config.ciphers);
+ server.tls_ctx_config.ciphers = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-ciphersuites") && argc == 2) {
+ zfree(server.tls_ctx_config.ciphersuites);
+ server.tls_ctx_config.ciphersuites = zstrdup(argv[1]);
+ } else if (!strcasecmp(argv[0],"tls-prefer-server-ciphers") && argc == 2) {
+ server.tls_ctx_config.prefer_server_ciphers = yesnotoi(argv[1]);
+#endif /* USE_OPENSSL */
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
@@ -1171,6 +1216,8 @@ void configSetCommand(client *c) {
} config_set_numerical_field(
"rdb-key-save-delay",server.rdb_key_save_delay,0,LLONG_MAX) {
} config_set_numerical_field(
+ "key-load-delay",server.key_load_delay,0,LLONG_MAX) {
+ } config_set_numerical_field(
"slave-announce-port",server.slave_announce_port,0,65535) {
} config_set_numerical_field(
"replica-announce-port",server.slave_announce_port,0,65535) {
@@ -1239,7 +1286,100 @@ void configSetCommand(client *c) {
"appendfsync",server.aof_fsync,aof_fsync_enum) {
} config_set_enum_field(
"repl-diskless-load",server.repl_diskless_load,repl_diskless_load_enum) {
-
+#ifdef USE_OPENSSL
+ /* TLS fields. */
+ } config_set_special_field("tls-cert-file") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.cert_file = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-cert-file. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.cert_file);
+ server.tls_ctx_config.cert_file = zstrdup(o->ptr);
+ } config_set_special_field("tls-key-file") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.key_file = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-key-file. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.key_file);
+ server.tls_ctx_config.key_file = zstrdup(o->ptr);
+ } config_set_special_field("tls-dh-params-file") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.dh_params_file = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-dh-params-file. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.dh_params_file);
+ server.tls_ctx_config.dh_params_file = zstrdup(o->ptr);
+ } config_set_special_field("tls-ca-cert-file") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.ca_cert_file = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-ca-cert-file. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.ca_cert_file);
+ server.tls_ctx_config.ca_cert_file = zstrdup(o->ptr);
+ } config_set_special_field("tls-ca-cert-dir") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.ca_cert_dir = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-ca-cert-dir. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.ca_cert_dir);
+ server.tls_ctx_config.ca_cert_dir = zstrdup(o->ptr);
+ } config_set_bool_field("tls-auth-clients", server.tls_auth_clients) {
+ } config_set_bool_field("tls-replication", server.tls_replication) {
+ } config_set_bool_field("tls-cluster", server.tls_cluster) {
+ } config_set_special_field("tls-protocols") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.protocols = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-protocols. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.protocols);
+ server.tls_ctx_config.protocols = zstrdup(o->ptr);
+ } config_set_special_field("tls-ciphers") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.ciphers = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-ciphers. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.ciphers);
+ server.tls_ctx_config.ciphers = zstrdup(o->ptr);
+ } config_set_special_field("tls-ciphersuites") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.ciphersuites = (char *) o->ptr;
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c,
+ "Unable to configure tls-ciphersuites. Check server logs.");
+ return;
+ }
+ zfree(server.tls_ctx_config.ciphersuites);
+ server.tls_ctx_config.ciphersuites = zstrdup(o->ptr);
+ } config_set_special_field("tls-prefer-server-ciphers") {
+ redisTLSContextConfig tmpctx = server.tls_ctx_config;
+ tmpctx.prefer_server_ciphers = yesnotoi(o->ptr);
+ if (tlsConfigure(&tmpctx) == C_ERR) {
+ addReplyError(c, "Unable to reconfigure TLS. Check server logs.");
+ return;
+ }
+ server.tls_ctx_config.prefer_server_ciphers = tmpctx.prefer_server_ciphers;
+#endif /* USE_OPENSSL */
/* Everyhing else is an error... */
} config_set_else {
addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s",
@@ -1313,6 +1453,16 @@ void configGetCommand(client *c) {
config_get_string_field("pidfile",server.pidfile);
config_get_string_field("slave-announce-ip",server.slave_announce_ip);
config_get_string_field("replica-announce-ip",server.slave_announce_ip);
+#ifdef USE_OPENSSL
+ config_get_string_field("tls-cert-file",server.tls_ctx_config.cert_file);
+ config_get_string_field("tls-key-file",server.tls_ctx_config.key_file);
+ config_get_string_field("tls-dh-params-file",server.tls_ctx_config.dh_params_file);
+ config_get_string_field("tls-ca-cert-file",server.tls_ctx_config.ca_cert_file);
+ config_get_string_field("tls-ca-cert-dir",server.tls_ctx_config.ca_cert_dir);
+ config_get_string_field("tls-protocols",server.tls_ctx_config.protocols);
+ config_get_string_field("tls-ciphers",server.tls_ctx_config.ciphers);
+ config_get_string_field("tls-ciphersuites",server.tls_ctx_config.ciphersuites);
+#endif
/* Numerical values */
config_get_numerical_field("maxmemory",server.maxmemory);
@@ -1360,6 +1510,7 @@ void configGetCommand(client *c) {
config_get_numerical_field("slowlog-max-len", server.slowlog_max_len);
config_get_numerical_field("tracking-table-max-fill", server.tracking_table_max_fill);
config_get_numerical_field("port",server.port);
+ config_get_numerical_field("tls-port",server.tls_port);
config_get_numerical_field("cluster-announce-port",server.cluster_announce_port);
config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port);
config_get_numerical_field("tcp-backlog",server.tcp_backlog);
@@ -1387,6 +1538,7 @@ void configGetCommand(client *c) {
config_get_numerical_field("cluster-replica-validity-factor",server.cluster_slave_validity_factor);
config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay);
config_get_numerical_field("rdb-key-save-delay",server.rdb_key_save_delay);
+ config_get_numerical_field("key-load-delay",server.key_load_delay);
config_get_numerical_field("tcp-keepalive",server.tcpkeepalive);
/* Bool (yes/no) values */
@@ -1399,7 +1551,11 @@ void configGetCommand(client *c) {
}
config_get_bool_field("activedefrag", server.active_defrag_enabled);
-
+ config_get_bool_field("tls-cluster",server.tls_cluster);
+ config_get_bool_field("tls-replication",server.tls_replication);
+ config_get_bool_field("tls-auth-clients",server.tls_auth_clients);
+ config_get_bool_field("tls-prefer-server-ciphers",
+ server.tls_ctx_config.prefer_server_ciphers);
/* Enum values */
config_get_enum_field("maxmemory-policy",
server.maxmemory_policy,maxmemory_policy_enum);
@@ -1513,6 +1669,7 @@ void configGetCommand(client *c) {
}
matches++;
}
+
setDeferredMapLen(c,replylen,matches);
}
@@ -2119,7 +2276,7 @@ int rewriteConfig(char *path) {
}
rewriteConfigStringOption(state,"pidfile",server.pidfile,CONFIG_DEFAULT_PID_FILE);
- rewriteConfigNumericalOption(state,"port",server.port,CONFIG_DEFAULT_SERVER_PORT);
+ rewriteConfigNumericalOption(state,"tls-port",server.tls_port,CONFIG_DEFAULT_SERVER_TLS_PORT);
rewriteConfigNumericalOption(state,"cluster-announce-port",server.cluster_announce_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT);
rewriteConfigNumericalOption(state,"cluster-announce-bus-port",server.cluster_announce_bus_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT);
rewriteConfigNumericalOption(state,"tcp-backlog",server.tcp_backlog,CONFIG_DEFAULT_TCP_BACKLOG);
@@ -2201,6 +2358,21 @@ int rewriteConfig(char *path) {
rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ);
rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE);
rewriteConfigNumericalOption(state,"rdb-key-save-delay",server.rdb_key_save_delay,CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY);
+ rewriteConfigNumericalOption(state,"key-load-delay",server.key_load_delay,CONFIG_DEFAULT_KEY_LOAD_DELAY);
+#ifdef USE_OPENSSL
+ rewriteConfigYesNoOption(state,"tls-cluster",server.tls_cluster,0);
+ rewriteConfigYesNoOption(state,"tls-replication",server.tls_replication,0);
+ rewriteConfigYesNoOption(state,"tls-auth-clients",server.tls_auth_clients,1);
+ rewriteConfigStringOption(state,"tls-cert-file",server.tls_ctx_config.cert_file,NULL);
+ rewriteConfigStringOption(state,"tls-key-file",server.tls_ctx_config.key_file,NULL);
+ rewriteConfigStringOption(state,"tls-dh-params-file",server.tls_ctx_config.dh_params_file,NULL);
+ rewriteConfigStringOption(state,"tls-ca-cert-file",server.tls_ctx_config.ca_cert_file,NULL);
+ rewriteConfigStringOption(state,"tls-ca-cert-dir",server.tls_ctx_config.ca_cert_dir,NULL);
+ rewriteConfigStringOption(state,"tls-protocols",server.tls_ctx_config.protocols,NULL);
+ rewriteConfigStringOption(state,"tls-ciphers",server.tls_ctx_config.ciphers,NULL);
+ rewriteConfigStringOption(state,"tls-ciphersuites",server.tls_ctx_config.ciphersuites,NULL);
+ rewriteConfigYesNoOption(state,"tls-prefer-server-ciphers",server.tls_ctx_config.prefer_server_ciphers,0);
+#endif
/* Rewrite Sentinel config if in Sentinel mode. */
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
diff --git a/src/connection.c b/src/connection.c
new file mode 100644
index 000000000..58d86c31b
--- /dev/null
+++ b/src/connection.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "server.h"
+#include "connhelpers.h"
+
+/* The connections module provides a lean abstraction of network connections
+ * to avoid direct socket and async event management across the Redis code base.
+ *
+ * It does NOT provide advanced connection features commonly found in similar
+ * libraries such as complete in/out buffer management, throttling, etc. These
+ * functions remain in networking.c.
+ *
+ * The primary goal is to allow transparent handling of TCP and TLS based
+ * connections. To do so, connections have the following properties:
+ *
+ * 1. A connection may live before its corresponding socket exists. This
+ * allows various context and configuration setting to be handled before
+ * establishing the actual connection.
+ * 2. The caller may register/unregister logical read/write handlers to be
+ * called when the connection has data to read from/can accept writes.
+ * These logical handlers may or may not correspond to actual AE events,
+ * depending on the implementation (for TCP they are; for TLS they aren't).
+ */
+
+ConnectionType CT_Socket;
+
+/* When a connection is created we must know its type already, but the
+ * underlying socket may or may not exist:
+ *
+ * - For accepted connections, it exists as we do not model the listen/accept
+ * part; So caller calls connCreateSocket() followed by connAccept().
+ * - For outgoing connections, the socket is created by the connection module
+ * itself; So caller calls connCreateSocket() followed by connConnect(),
+ * which registers a connect callback that fires on connected/error state
+ * (and after any transport level handshake was done).
+ *
+ * NOTE: An earlier version relied on connections being part of other structs
+ * and not independently allocated. This could lead to further optimizations
+ * like using container_of(), etc. However it was discontinued in favor of
+ * this approach for these reasons:
+ *
+ * 1. In some cases conns are created/handled outside the context of the
+ * containing struct, in which case it gets a bit awkward to copy them.
+ * 2. Future implementations may wish to allocate arbitrary data for the
+ * connection.
+ * 3. The container_of() approach is anyway risky because connections may
+ * be embedded in different structs, not just client.
+ */
+
+connection *connCreateSocket() {
+ connection *conn = zcalloc(sizeof(connection));
+ conn->type = &CT_Socket;
+ conn->fd = -1;
+
+ return conn;
+}
+
+/* Create a new socket-type connection that is already associated with
+ * an accepted connection.
+ *
+ * The socket is not read for I/O until connAccept() was called and
+ * invoked the connection-level accept handler.
+ */
+connection *connCreateAcceptedSocket(int fd) {
+ connection *conn = connCreateSocket();
+ conn->fd = fd;
+ conn->state = CONN_STATE_ACCEPTING;
+ return conn;
+}
+
+static int connSocketConnect(connection *conn, const char *addr, int port, const char *src_addr,
+ ConnectionCallbackFunc connect_handler) {
+ int fd = anetTcpNonBlockBestEffortBindConnect(NULL,addr,port,src_addr);
+ if (fd == -1) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = errno;
+ return C_ERR;
+ }
+
+ conn->fd = fd;
+ conn->state = CONN_STATE_CONNECTING;
+
+ conn->conn_handler = connect_handler;
+ aeCreateFileEvent(server.el, conn->fd, AE_WRITABLE,
+ conn->type->ae_handler, conn);
+
+ return C_OK;
+}
+
+/* Returns true if a write handler is registered */
+int connHasWriteHandler(connection *conn) {
+ return conn->write_handler != NULL;
+}
+
+/* Returns true if a read handler is registered */
+int connHasReadHandler(connection *conn) {
+ return conn->read_handler != NULL;
+}
+
+/* Associate a private data pointer with the connection */
+void connSetPrivateData(connection *conn, void *data) {
+ conn->private_data = data;
+}
+
+/* Get the associated private data pointer */
+void *connGetPrivateData(connection *conn) {
+ return conn->private_data;
+}
+
+/* ------ Pure socket connections ------- */
+
+/* A very incomplete list of implementation-specific calls. Much of the above shall
+ * move here as we implement additional connection types.
+ */
+
+/* Close the connection and free resources. */
+static void connSocketClose(connection *conn) {
+ if (conn->fd != -1) {
+ aeDeleteFileEvent(server.el,conn->fd,AE_READABLE);
+ aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+ close(conn->fd);
+ conn->fd = -1;
+ }
+
+ /* If called from within a handler, schedule the close but
+ * keep the connection until the handler returns.
+ */
+ if (conn->flags & CONN_FLAG_IN_HANDLER) {
+ conn->flags |= CONN_FLAG_CLOSE_SCHEDULED;
+ return;
+ }
+
+ zfree(conn);
+}
+
+static int connSocketWrite(connection *conn, const void *data, size_t data_len) {
+ int ret = write(conn->fd, data, data_len);
+ if (ret < 0 && errno != EAGAIN) {
+ conn->last_errno = errno;
+ conn->state = CONN_STATE_ERROR;
+ }
+
+ return ret;
+}
+
+static int connSocketRead(connection *conn, void *buf, size_t buf_len) {
+ int ret = read(conn->fd, buf, buf_len);
+ if (!ret) {
+ conn->state = CONN_STATE_CLOSED;
+ } else if (ret < 0 && errno != EAGAIN) {
+ conn->last_errno = errno;
+ conn->state = CONN_STATE_ERROR;
+ }
+
+ return ret;
+}
+
+static int connSocketAccept(connection *conn, ConnectionCallbackFunc accept_handler) {
+ if (conn->state != CONN_STATE_ACCEPTING) return C_ERR;
+ conn->state = CONN_STATE_CONNECTED;
+ if (!callHandler(conn, accept_handler)) return C_ERR;
+ return C_OK;
+}
+
+/* Register a write handler, to be called when the connection is writable.
+ * If NULL, the existing handler is removed.
+ *
+ * The barrier flag indicates a write barrier is requested, resulting with
+ * CONN_FLAG_WRITE_BARRIER set. This will ensure that the write handler is
+ * always called before and not after the read handler in a single event
+ * loop.
+ */
+static int connSocketSetWriteHandler(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ if (func == conn->write_handler) return C_OK;
+
+ conn->write_handler = func;
+ if (barrier)
+ conn->flags |= CONN_FLAG_WRITE_BARRIER;
+ else
+ conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
+ if (!conn->write_handler)
+ aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+ else
+ if (aeCreateFileEvent(server.el,conn->fd,AE_WRITABLE,
+ conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
+ return C_OK;
+}
+
+/* Register a read handler, to be called when the connection is readable.
+ * If NULL, the existing handler is removed.
+ */
+static int connSocketSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ if (func == conn->read_handler) return C_OK;
+
+ conn->read_handler = func;
+ if (!conn->read_handler)
+ aeDeleteFileEvent(server.el,conn->fd,AE_READABLE);
+ else
+ if (aeCreateFileEvent(server.el,conn->fd,
+ AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
+ return C_OK;
+}
+
+static const char *connSocketGetLastError(connection *conn) {
+ return strerror(conn->last_errno);
+}
+
+static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask)
+{
+ UNUSED(el);
+ UNUSED(fd);
+ connection *conn = clientData;
+
+ if (conn->state == CONN_STATE_CONNECTING &&
+ (mask & AE_WRITABLE) && conn->conn_handler) {
+
+ if (connGetSocketError(conn)) {
+ conn->last_errno = errno;
+ conn->state = CONN_STATE_ERROR;
+ } else {
+ conn->state = CONN_STATE_CONNECTED;
+ }
+
+ if (!conn->write_handler) aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
+
+ if (!callHandler(conn, conn->conn_handler)) return;
+ conn->conn_handler = NULL;
+ }
+
+ /* Normally we execute the readable event first, and the writable
+ * event later. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if WRITE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsync'ing a file to disk,
+ * before replying to a client. */
+ int invert = conn->flags & CONN_FLAG_WRITE_BARRIER;
+
+ int call_write = (mask & AE_WRITABLE) && conn->write_handler;
+ int call_read = (mask & AE_READABLE) && conn->read_handler;
+
+ /* Handle normal I/O flows */
+ if (!invert && call_read) {
+ if (!callHandler(conn, conn->read_handler)) return;
+ }
+ /* Fire the writable event. */
+ if (call_write) {
+ if (!callHandler(conn, conn->write_handler)) return;
+ }
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert && call_read) {
+ if (!callHandler(conn, conn->read_handler)) return;
+ }
+}
+
+static int connSocketBlockingConnect(connection *conn, const char *addr, int port, long long timeout) {
+ int fd = anetTcpNonBlockConnect(NULL,addr,port);
+ if (fd == -1) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = errno;
+ return C_ERR;
+ }
+
+ if ((aeWait(fd, AE_WRITABLE, timeout) & AE_WRITABLE) == 0) {
+ conn->state = CONN_STATE_ERROR;
+ conn->last_errno = ETIMEDOUT;
+ }
+
+ conn->fd = fd;
+ conn->state = CONN_STATE_CONNECTED;
+ return C_OK;
+}
+
+/* Connection-based versions of syncio.c functions.
+ * NOTE: This should ideally be refactored out in favor of pure async work.
+ */
+
+static ssize_t connSocketSyncWrite(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncWrite(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connSocketSyncRead(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncRead(conn->fd, ptr, size, timeout);
+}
+
+static ssize_t connSocketSyncReadLine(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return syncReadLine(conn->fd, ptr, size, timeout);
+}
+
+
+ConnectionType CT_Socket = {
+ .ae_handler = connSocketEventHandler,
+ .close = connSocketClose,
+ .write = connSocketWrite,
+ .read = connSocketRead,
+ .accept = connSocketAccept,
+ .connect = connSocketConnect,
+ .set_write_handler = connSocketSetWriteHandler,
+ .set_read_handler = connSocketSetReadHandler,
+ .get_last_error = connSocketGetLastError,
+ .blocking_connect = connSocketBlockingConnect,
+ .sync_write = connSocketSyncWrite,
+ .sync_read = connSocketSyncRead,
+ .sync_readline = connSocketSyncReadLine
+};
+
+
+int connGetSocketError(connection *conn) {
+ int sockerr = 0;
+ socklen_t errlen = sizeof(sockerr);
+
+ if (getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
+ sockerr = errno;
+ return sockerr;
+}
+
+int connPeerToString(connection *conn, char *ip, size_t ip_len, int *port) {
+ return anetPeerToString(conn ? conn->fd : -1, ip, ip_len, port);
+}
+
+int connFormatPeer(connection *conn, char *buf, size_t buf_len) {
+ return anetFormatPeer(conn ? conn->fd : -1, buf, buf_len);
+}
+
+int connSockName(connection *conn, char *ip, size_t ip_len, int *port) {
+ return anetSockName(conn->fd, ip, ip_len, port);
+}
+
+int connBlock(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetBlock(NULL, conn->fd);
+}
+
+int connNonBlock(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetNonBlock(NULL, conn->fd);
+}
+
+int connEnableTcpNoDelay(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetEnableTcpNoDelay(NULL, conn->fd);
+}
+
+int connDisableTcpNoDelay(connection *conn) {
+ if (conn->fd == -1) return C_ERR;
+ return anetDisableTcpNoDelay(NULL, conn->fd);
+}
+
+int connKeepAlive(connection *conn, int interval) {
+ if (conn->fd == -1) return C_ERR;
+ return anetKeepAlive(NULL, conn->fd, interval);
+}
+
+int connSendTimeout(connection *conn, long long ms) {
+ return anetSendTimeout(NULL, conn->fd, ms);
+}
+
+int connRecvTimeout(connection *conn, long long ms) {
+ return anetRecvTimeout(NULL, conn->fd, ms);
+}
+
+int connGetState(connection *conn) {
+ return conn->state;
+}
+
+/* Return a text that describes the connection, suitable for inclusion
+ * in CLIENT LIST and similar outputs.
+ *
+ * For sockets, we always return "fd=<fdnum>" to maintain compatibility.
+ */
+const char *connGetInfo(connection *conn, char *buf, size_t buf_len) {
+ snprintf(buf, buf_len-1, "fd=%i", conn->fd);
+ return buf;
+}
+
diff --git a/src/connection.h b/src/connection.h
new file mode 100644
index 000000000..97622f8d6
--- /dev/null
+++ b/src/connection.h
@@ -0,0 +1,220 @@
+
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_CONNECTION_H
+#define __REDIS_CONNECTION_H
+
+#define CONN_INFO_LEN 32
+
+struct aeEventLoop;
+typedef struct connection connection;
+
+typedef enum {
+ CONN_STATE_NONE = 0,
+ CONN_STATE_CONNECTING,
+ CONN_STATE_ACCEPTING,
+ CONN_STATE_CONNECTED,
+ CONN_STATE_CLOSED,
+ CONN_STATE_ERROR
+} ConnectionState;
+
+#define CONN_FLAG_IN_HANDLER (1<<0) /* A handler execution is in progress */
+#define CONN_FLAG_CLOSE_SCHEDULED (1<<1) /* Closed scheduled by a handler */
+#define CONN_FLAG_WRITE_BARRIER (1<<2) /* Write barrier requested */
+
+typedef void (*ConnectionCallbackFunc)(struct connection *conn);
+
+typedef struct ConnectionType {
+ void (*ae_handler)(struct aeEventLoop *el, int fd, void *clientData, int mask);
+ int (*connect)(struct connection *conn, const char *addr, int port, const char *source_addr, ConnectionCallbackFunc connect_handler);
+ int (*write)(struct connection *conn, const void *data, size_t data_len);
+ int (*read)(struct connection *conn, void *buf, size_t buf_len);
+ void (*close)(struct connection *conn);
+ int (*accept)(struct connection *conn, ConnectionCallbackFunc accept_handler);
+ int (*set_write_handler)(struct connection *conn, ConnectionCallbackFunc handler, int barrier);
+ int (*set_read_handler)(struct connection *conn, ConnectionCallbackFunc handler);
+ const char *(*get_last_error)(struct connection *conn);
+ int (*blocking_connect)(struct connection *conn, const char *addr, int port, long long timeout);
+ ssize_t (*sync_write)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+ ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+ ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
+} ConnectionType;
+
+struct connection {
+ ConnectionType *type;
+ ConnectionState state;
+ int flags;
+ int last_errno;
+ void *private_data;
+ ConnectionCallbackFunc conn_handler;
+ ConnectionCallbackFunc write_handler;
+ ConnectionCallbackFunc read_handler;
+ int fd;
+};
+
+/* The connection module does not deal with listening and accepting sockets,
+ * so we assume we have a socket when an incoming connection is created.
+ *
+ * The fd supplied should therefore be associated with an already accept()ed
+ * socket.
+ *
+ * connAccept() may directly call accept_handler(), or return and call it
+ * at a later time. This behavior is a bit awkward but aims to reduce the need
+ * to wait for the next event loop, if no additional handshake is required.
+ */
+
+static inline int connAccept(connection *conn, ConnectionCallbackFunc accept_handler) {
+ return conn->type->accept(conn, accept_handler);
+}
+
+/* Establish a connection. The connect_handler will be called when the connection
+ * is established, or if an error has occured.
+ *
+ * The connection handler will be responsible to set up any read/write handlers
+ * as needed.
+ *
+ * If C_ERR is returned, the operation failed and the connection handler shall
+ * not be expected.
+ */
+static inline int connConnect(connection *conn, const char *addr, int port, const char *src_addr,
+ ConnectionCallbackFunc connect_handler) {
+ return conn->type->connect(conn, addr, port, src_addr, connect_handler);
+}
+
+/* Blocking connect.
+ *
+ * NOTE: This is implemented in order to simplify the transition to the abstract
+ * connections, but should probably be refactored out of cluster.c and replication.c,
+ * in favor of a pure async implementation.
+ */
+static inline int connBlockingConnect(connection *conn, const char *addr, int port, long long timeout) {
+ return conn->type->blocking_connect(conn, addr, port, timeout);
+}
+
+/* Write to connection, behaves the same as write(2).
+ *
+ * Like write(2), a short write is possible. A -1 return indicates an error.
+ *
+ * The caller should NOT rely on errno. Testing for an EAGAIN-like condition, use
+ * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
+ */
+static inline int connWrite(connection *conn, const void *data, size_t data_len) {
+ return conn->type->write(conn, data, data_len);
+}
+
+/* Read from the connection, behaves the same as read(2).
+ *
+ * Like read(2), a short read is possible. A return value of 0 will indicate the
+ * connection was closed, and -1 will indicate an error.
+ *
+ * The caller should NOT rely on errno. Testing for an EAGAIN-like condition, use
+ * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
+ */
+static inline int connRead(connection *conn, void *buf, size_t buf_len) {
+ return conn->type->read(conn, buf, buf_len);
+}
+
+/* Register a write handler, to be called when the connection is writable.
+ * If NULL, the existing handler is removed.
+ */
+static inline int connSetWriteHandler(connection *conn, ConnectionCallbackFunc func) {
+ return conn->type->set_write_handler(conn, func, 0);
+}
+
+/* Register a read handler, to be called when the connection is readable.
+ * If NULL, the existing handler is removed.
+ */
+static inline int connSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ return conn->type->set_read_handler(conn, func);
+}
+
+/* Set a write handler, and possibly enable a write barrier, this flag is
+ * cleared when write handler is changed or removed.
+ * With barroer enabled, we never fire the event if the read handler already
+ * fired in the same event loop iteration. Useful when you want to persist
+ * things to disk before sending replies, and want to do that in a group fashion. */
+static inline int connSetWriteHandlerWithBarrier(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ return conn->type->set_write_handler(conn, func, barrier);
+}
+
+static inline void connClose(connection *conn) {
+ conn->type->close(conn);
+}
+
+/* Returns the last error encountered by the connection, as a string. If no error,
+ * a NULL is returned.
+ */
+static inline const char *connGetLastError(connection *conn) {
+ return conn->type->get_last_error(conn);
+}
+
+static inline ssize_t connSyncWrite(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_write(conn, ptr, size, timeout);
+}
+
+static inline ssize_t connSyncRead(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_read(conn, ptr, size, timeout);
+}
+
+static inline ssize_t connSyncReadLine(connection *conn, char *ptr, ssize_t size, long long timeout) {
+ return conn->type->sync_readline(conn, ptr, size, timeout);
+}
+
+connection *connCreateSocket();
+connection *connCreateAcceptedSocket(int fd);
+
+connection *connCreateTLS();
+connection *connCreateAcceptedTLS(int fd, int require_auth);
+
+void connSetPrivateData(connection *conn, void *data);
+void *connGetPrivateData(connection *conn);
+int connGetState(connection *conn);
+int connHasWriteHandler(connection *conn);
+int connHasReadHandler(connection *conn);
+int connGetSocketError(connection *conn);
+
+/* anet-style wrappers to conns */
+int connBlock(connection *conn);
+int connNonBlock(connection *conn);
+int connEnableTcpNoDelay(connection *conn);
+int connDisableTcpNoDelay(connection *conn);
+int connKeepAlive(connection *conn, int interval);
+int connSendTimeout(connection *conn, long long ms);
+int connRecvTimeout(connection *conn, long long ms);
+int connPeerToString(connection *conn, char *ip, size_t ip_len, int *port);
+int connFormatPeer(connection *conn, char *buf, size_t buf_len);
+int connSockName(connection *conn, char *ip, size_t ip_len, int *port);
+const char *connGetInfo(connection *conn, char *buf, size_t buf_len);
+
+/* Helpers for tls special considerations */
+int tlsHasPendingData();
+void tlsProcessPendingData();
+
+#endif /* __REDIS_CONNECTION_H */
diff --git a/src/connhelpers.h b/src/connhelpers.h
new file mode 100644
index 000000000..f237c9b1d
--- /dev/null
+++ b/src/connhelpers.h
@@ -0,0 +1,85 @@
+
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __REDIS_CONNHELPERS_H
+#define __REDIS_CONNHELPERS_H
+
+#include "connection.h"
+
+/* These are helper functions that are common to different connection
+ * implementations (currently sockets in connection.c and TLS in tls.c).
+ *
+ * Currently helpers implement the mechanisms for invoking connection
+ * handlers, tracking in-handler states and dealing with deferred
+ * destruction (if invoked by a handler).
+ */
+
+/* Called whenever a handler is invoked on a connection and sets the
+ * CONN_FLAG_IN_HANDLER flag to indicate we're in a handler context.
+ *
+ * An attempt to close a connection while CONN_FLAG_IN_HANDLER is
+ * set will result with deferred close, i.e. setting the CONN_FLAG_CLOSE_SCHEDULED
+ * instead of destructing it.
+ */
+static inline void enterHandler(connection *conn) {
+ conn->flags |= CONN_FLAG_IN_HANDLER;
+}
+
+/* Called whenever a handler returns. This unsets the CONN_FLAG_IN_HANDLER
+ * flag and performs actual close/destruction if a deferred close was
+ * scheduled by the handler.
+ */
+static inline int exitHandler(connection *conn) {
+ conn->flags &= ~CONN_FLAG_IN_HANDLER;
+ if (conn->flags & CONN_FLAG_CLOSE_SCHEDULED) {
+ connClose(conn);
+ return 0;
+ }
+ return 1;
+}
+
+/* Helper for connection implementations to call handlers:
+ * 1. Mark the handler in use.
+ * 2. Execute the handler (if set).
+ * 3. Mark the handler as NOT in use and perform deferred close if was
+ * requested by the handler at any time.
+ */
+static inline int callHandler(connection *conn, ConnectionCallbackFunc handler) {
+ conn->flags |= CONN_FLAG_IN_HANDLER;
+ if (handler) handler(conn);
+ conn->flags &= ~CONN_FLAG_IN_HANDLER;
+ if (conn->flags & CONN_FLAG_CLOSE_SCHEDULED) {
+ connClose(conn);
+ return 0;
+ }
+ return 1;
+}
+
+#endif /* __REDIS_CONNHELPERS_H */
diff --git a/src/debug.c b/src/debug.c
index 29a244e24..179f6d2c9 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -369,6 +369,7 @@ void debugCommand(client *c) {
"SDSLEN <key> -- Show low level SDS string info representing key and value.",
"SEGFAULT -- Crash the server with sigsegv.",
"SET-ACTIVE-EXPIRE <0|1> -- Setting it to 0 disables expiring keys in background when they are not accessed (otherwise the Redis behavior). Setting it to 1 reenables back the default.",
+"AOF-FLUSH-SLEEP <microsec> -- Server will sleep before flushing the AOF, this is used for testing",
"SLEEP <seconds> -- Stop the server for <seconds>. Decimals allowed.",
"STRUCTSIZE -- Return the size of different Redis core C structures.",
"ZIPLIST <key> -- Show low level info about the ziplist encoding.",
@@ -649,6 +650,11 @@ NULL
{
server.active_expire_enabled = atoi(c->argv[2]->ptr);
addReply(c,shared.ok);
+ } else if (!strcasecmp(c->argv[1]->ptr,"aof-flush-sleep") &&
+ c->argc == 3)
+ {
+ server.aof_flush_sleep = atoi(c->argv[2]->ptr);
+ addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"lua-always-replicate-commands") &&
c->argc == 3)
{
@@ -762,11 +768,12 @@ void _serverAssert(const char *estr, const char *file, int line) {
void _serverAssertPrintClientInfo(const client *c) {
int j;
+ char conninfo[CONN_INFO_LEN];
bugReportStart();
serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
- serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long)c->flags);
- serverLog(LL_WARNING,"client->fd = %d", c->fd);
+ serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long) c->flags);
+ serverLog(LL_WARNING,"client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo)));
serverLog(LL_WARNING,"client->argc = %d", c->argc);
for (j=0; j < c->argc; j++) {
char buf[128];
diff --git a/src/module.c b/src/module.c
index 7e0a419b6..0d83817fa 100644
--- a/src/module.c
+++ b/src/module.c
@@ -2872,7 +2872,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
/* Create the client and dispatch the command. */
va_start(ap, fmt);
- c = createClient(-1);
+ c = createClient(NULL);
c->user = NULL; /* Root user. */
argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
replicate = flags & REDISMODULE_ARGV_REPLICATE;
@@ -3836,7 +3836,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc
bc->disconnect_callback = NULL; /* Set by RM_SetDisconnectCallback() */
bc->free_privdata = free_privdata;
bc->privdata = NULL;
- bc->reply_client = createClient(-1);
+ bc->reply_client = createClient(NULL);
bc->reply_client->flags |= CLIENT_MODULE;
bc->dbid = c->db->id;
c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0;
@@ -4077,7 +4077,7 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) {
* access it safely from another thread, so we create a fake client here
* in order to keep things like the currently selected database and similar
* things. */
- ctx->client = createClient(-1);
+ ctx->client = createClient(NULL);
if (bc) {
selectDb(ctx->client,bc->dbid);
ctx->client->id = bc->client->id;
@@ -5552,7 +5552,7 @@ void moduleInitModulesSystem(void) {
/* Set up the keyspace notification susbscriber list and static client */
moduleKeyspaceSubscribers = listCreate();
- moduleFreeContextReusedClient = createClient(-1);
+ moduleFreeContextReusedClient = createClient(NULL);
moduleFreeContextReusedClient->flags |= CLIENT_MODULE;
moduleFreeContextReusedClient->user = NULL; /* root user. */
diff --git a/src/networking.c b/src/networking.c
index a959d557a..ddfe4d8e3 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -84,32 +84,27 @@ void linkClient(client *c) {
raxInsert(server.clients_index,(unsigned char*)&id,sizeof(id),c,NULL);
}
-client *createClient(int fd) {
+client *createClient(connection *conn) {
client *c = zmalloc(sizeof(client));
- /* passing -1 as fd it is possible to create a non connected client.
+ /* passing NULL as conn it is possible to create a non connected client.
* This is useful since all the commands needs to be executed
* in the context of a client. When commands are executed in other
* contexts (for instance a Lua script) we need a non connected client. */
- if (fd != -1) {
- anetNonBlock(NULL,fd);
- anetEnableTcpNoDelay(NULL,fd);
+ if (conn) {
+ connNonBlock(conn);
+ connEnableTcpNoDelay(conn);
if (server.tcpkeepalive)
- anetKeepAlive(NULL,fd,server.tcpkeepalive);
- if (aeCreateFileEvent(server.el,fd,AE_READABLE,
- readQueryFromClient, c) == AE_ERR)
- {
- close(fd);
- zfree(c);
- return NULL;
- }
+ connKeepAlive(conn,server.tcpkeepalive);
+ connSetReadHandler(conn, readQueryFromClient);
+ connSetPrivateData(conn, c);
}
selectDb(c,0);
uint64_t client_id = ++server.next_client_id;
c->id = client_id;
c->resp = 2;
- c->fd = fd;
+ c->conn = conn;
c->name = NULL;
c->bufpos = 0;
c->qb_pos = 0;
@@ -161,7 +156,7 @@ client *createClient(int fd) {
c->client_tracking_redirection = 0;
listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid);
listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
- if (fd != -1) linkClient(c);
+ if (conn) linkClient(c);
initClientMultiState(c);
return c;
}
@@ -227,7 +222,7 @@ int prepareClientToWrite(client *c) {
if ((c->flags & CLIENT_MASTER) &&
!(c->flags & CLIENT_MASTER_FORCE_REPLY)) return C_ERR;
- if (c->fd <= 0) return C_ERR; /* Fake client for AOF loading. */
+ if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
/* Schedule the client to write the output buffers to the socket, unless
* it should already be setup to do so (it has already pending data). */
@@ -777,28 +772,13 @@ int clientHasPendingReplies(client *c) {
return c->bufpos || listLength(c->reply);
}
-#define MAX_ACCEPTS_PER_CALL 1000
-static void acceptCommonHandler(int fd, int flags, char *ip) {
- client *c;
- if ((c = createClient(fd)) == NULL) {
- serverLog(LL_WARNING,
- "Error registering fd event for the new client: %s (fd=%d)",
- strerror(errno),fd);
- close(fd); /* May be already closed, just ignore errors */
- return;
- }
- /* If maxclient directive is set and this is one client more... close the
- * connection. Note that we create the client instead to check before
- * for this condition, since now the socket is already set in non-blocking
- * mode and we can send an error for free using the Kernel I/O */
- if (listLength(server.clients) > server.maxclients) {
- char *err = "-ERR max number of clients reached\r\n";
+void clientAcceptHandler(connection *conn) {
+ client *c = connGetPrivateData(conn);
- /* That's a best effort error message, don't check write errors */
- if (write(c->fd,err,strlen(err)) == -1) {
- /* Nothing to do, Just to avoid the warning... */
- }
- server.stat_rejected_conn++;
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,
+ "Error accepting a client connection: %s",
+ connGetLastError(conn));
freeClient(c);
return;
}
@@ -810,10 +790,12 @@ static void acceptCommonHandler(int fd, int flags, char *ip) {
if (server.protected_mode &&
server.bindaddr_count == 0 &&
DefaultUser->flags & USER_FLAG_NOPASS &&
- !(flags & CLIENT_UNIX_SOCKET) &&
- ip != NULL)
+ !(c->flags & CLIENT_UNIX_SOCKET))
{
- if (strcmp(ip,"127.0.0.1") && strcmp(ip,"::1")) {
+ char cip[NET_IP_STR_LEN+1] = { 0 };
+ connPeerToString(conn, cip, sizeof(cip)-1, NULL);
+
+ if (strcmp(cip,"127.0.0.1") && strcmp(cip,"::1")) {
char *err =
"-DENIED Redis is running in protected mode because protected "
"mode is enabled, no bind address was specified, no "
@@ -835,7 +817,7 @@ static void acceptCommonHandler(int fd, int flags, char *ip) {
"4) Setup a bind address or an authentication password. "
"NOTE: You only need to do one of the above things in order for "
"the server to start accepting connections from the outside.\r\n";
- if (write(c->fd,err,strlen(err)) == -1) {
+ if (connWrite(c->conn,err,strlen(err)) == -1) {
/* Nothing to do, Just to avoid the warning... */
}
server.stat_rejected_conn++;
@@ -845,7 +827,63 @@ static void acceptCommonHandler(int fd, int flags, char *ip) {
}
server.stat_numconnections++;
+}
+
+
+#define MAX_ACCEPTS_PER_CALL 1000
+static void acceptCommonHandler(connection *conn, int flags, char *ip) {
+ client *c;
+ UNUSED(ip);
+
+ /* Admission control will happen before a client is created and connAccept()
+ * called, because we don't want to even start transport-level negotiation
+ * if rejected.
+ */
+ if (listLength(server.clients) >= server.maxclients) {
+ char *err = "-ERR max number of clients reached\r\n";
+
+ /* That's a best effort error message, don't check write errors.
+ * Note that for TLS connections, no handshake was done yet so nothing is written
+ * and the connection will just drop.
+ */
+ if (connWrite(conn,err,strlen(err)) == -1) {
+ /* Nothing to do, Just to avoid the warning... */
+ }
+ server.stat_rejected_conn++;
+ connClose(conn);
+ return;
+ }
+
+ /* Create connection and client */
+ if ((c = createClient(conn)) == NULL) {
+ char conninfo[100];
+ serverLog(LL_WARNING,
+ "Error registering fd event for the new client: %s (conn: %s)",
+ connGetLastError(conn),
+ connGetInfo(conn, conninfo, sizeof(conninfo)));
+ connClose(conn); /* May be already closed, just ignore errors */
+ return;
+ }
+
+ /* Last chance to keep flags */
c->flags |= flags;
+
+ /* Initiate accept.
+ *
+ * Note that connAccept() is free to do two things here:
+ * 1. Call clientAcceptHandler() immediately;
+ * 2. Schedule a future call to clientAcceptHandler().
+ *
+ * Because of that, we must do nothing else afterwards.
+ */
+ if (connAccept(conn, clientAcceptHandler) == C_ERR) {
+ char conninfo[100];
+ serverLog(LL_WARNING,
+ "Error accepting a client connection: %s (conn: %s)",
+ connGetLastError(conn), connGetInfo(conn, conninfo, sizeof(conninfo)));
+ freeClient(connGetPrivateData(conn));
+ return;
+ }
}
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
@@ -864,7 +902,27 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
}
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
- acceptCommonHandler(cfd,0,cip);
+ acceptCommonHandler(connCreateAcceptedSocket(cfd),0,cip);
+ }
+}
+
+void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
+ int cport, cfd, max = MAX_ACCEPTS_PER_CALL;
+ char cip[NET_IP_STR_LEN];
+ UNUSED(el);
+ UNUSED(mask);
+ UNUSED(privdata);
+
+ while(max--) {
+ cfd = anetTcpAccept(server.neterr, fd, cip, sizeof(cip), &cport);
+ if (cfd == ANET_ERR) {
+ if (errno != EWOULDBLOCK)
+ serverLog(LL_WARNING,
+ "Accepting client connection: %s", server.neterr);
+ return;
+ }
+ serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
+ acceptCommonHandler(connCreateAcceptedTLS(cfd, server.tls_auth_clients),0,cip);
}
}
@@ -883,7 +941,7 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
}
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
- acceptCommonHandler(cfd,CLIENT_UNIX_SOCKET,NULL);
+ acceptCommonHandler(connCreateAcceptedSocket(cfd),CLIENT_UNIX_SOCKET,NULL);
}
}
@@ -914,10 +972,10 @@ void unlinkClient(client *c) {
/* If this is marked as current client unset it. */
if (server.current_client == c) server.current_client = NULL;
- /* Certain operations must be done only if the client has an active socket.
+ /* Certain operations must be done only if the client has an active connection.
* If the client was already unlinked or if it's a "fake client" the
- * fd is already set to -1. */
- if (c->fd != -1) {
+ * conn is already set to NULL. */
+ if (c->conn) {
/* Remove from the list of active clients. */
if (c->client_list_node) {
uint64_t id = htonu64(c->id);
@@ -926,21 +984,23 @@ void unlinkClient(client *c) {
c->client_list_node = NULL;
}
- /* In the case of diskless replication the fork is writing to the
- * sockets and just closing the fd isn't enough, if we don't also
- * shutdown the socket the fork will continue to write to the slave
- * and the salve will only find out that it was disconnected when
- * it will finish reading the rdb. */
- if ((c->flags & CLIENT_SLAVE) &&
- (c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)) {
- shutdown(c->fd, SHUT_RDWR);
+ /* Check if this is a replica waiting for diskless replication (rdb pipe),
+ * in which case it needs to be cleaned from that list */
+ if (c->flags & CLIENT_SLAVE &&
+ c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
+ server.rdb_pipe_conns)
+ {
+ int i;
+ for (i=0; i < server.rdb_pipe_numconns; i++) {
+ if (server.rdb_pipe_conns[i] == c->conn) {
+ rdbPipeWriteHandlerConnRemoved(c->conn);
+ server.rdb_pipe_conns[i] = NULL;
+ break;
+ }
+ }
}
-
- /* Unregister async I/O handlers and close the socket. */
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
- close(c->fd);
- c->fd = -1;
+ connClose(c->conn);
+ c->conn = NULL;
}
/* Remove from the list of pending writes if needed. */
@@ -1112,19 +1172,20 @@ client *lookupClientByID(uint64_t id) {
/* Write data in output buffers to client. Return C_OK if the client
* is still valid after the call, C_ERR if it was freed because of some
- * error.
+ * error. If handler_installed is set, it will attempt to clear the
+ * write event.
*
* This function is called by threads, but always with handler_installed
* set to 0. So when handler_installed is set to 0 the function must be
* thread safe. */
-int writeToClient(int fd, client *c, int handler_installed) {
+int writeToClient(client *c, int handler_installed) {
ssize_t nwritten = 0, totwritten = 0;
size_t objlen;
clientReplyBlock *o;
while(clientHasPendingReplies(c)) {
if (c->bufpos > 0) {
- nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
+ nwritten = connWrite(c->conn,c->buf+c->sentlen,c->bufpos-c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
@@ -1145,7 +1206,7 @@ int writeToClient(int fd, client *c, int handler_installed) {
continue;
}
- nwritten = write(fd, o->buf + c->sentlen, objlen - c->sentlen);
+ nwritten = connWrite(c->conn, o->buf + c->sentlen, objlen - c->sentlen);
if (nwritten <= 0) break;
c->sentlen += nwritten;
totwritten += nwritten;
@@ -1180,11 +1241,11 @@ int writeToClient(int fd, client *c, int handler_installed) {
}
server.stat_net_output_bytes += totwritten;
if (nwritten == -1) {
- if (errno == EAGAIN) {
+ if (connGetState(c->conn) == CONN_STATE_CONNECTED) {
nwritten = 0;
} else {
serverLog(LL_VERBOSE,
- "Error writing to client: %s", strerror(errno));
+ "Error writing to client: %s", connGetLastError(c->conn));
freeClientAsync(c);
return C_ERR;
}
@@ -1202,7 +1263,7 @@ int writeToClient(int fd, client *c, int handler_installed) {
* adDeleteFileEvent() is not thread safe: however writeToClient()
* is always called with handler_installed set to 0 from threads
* so we are fine. */
- if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ if (handler_installed) connSetWriteHandler(c->conn, NULL);
/* Close connection after entire reply has been sent. */
if (c->flags & CLIENT_CLOSE_AFTER_REPLY) {
@@ -1214,10 +1275,9 @@ int writeToClient(int fd, client *c, int handler_installed) {
}
/* Write event handler. Just send data to the client. */
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- UNUSED(el);
- UNUSED(mask);
- writeToClient(fd,privdata,1);
+void sendReplyToClient(connection *conn) {
+ client *c = connGetPrivateData(conn);
+ writeToClient(c,1);
}
/* This function is called just before entering the event loop, in the hope
@@ -1240,26 +1300,24 @@ int handleClientsWithPendingWrites(void) {
if (c->flags & CLIENT_PROTECTED) continue;
/* Try to write buffers to the client socket. */
- if (writeToClient(c->fd,c,0) == C_ERR) continue;
+ if (writeToClient(c,0) == C_ERR) continue;
/* If after the synchronous writes above we still have data to
* output to the client, we need to install the writable handler. */
if (clientHasPendingReplies(c)) {
- int ae_flags = AE_WRITABLE;
+ int ae_barrier = 0;
/* For the fsync=always policy, we want that a given FD is never
* served for reading and writing in the same event loop iteration,
* so that in the middle of receiving the query, and serving it
* to the client, we'll call beforeSleep() that will do the
- * actual fsync of AOF to disk. AE_BARRIER ensures that. */
+ * actual fsync of AOF to disk. the write barrier ensures that. */
if (server.aof_state == AOF_ON &&
server.aof_fsync == AOF_FSYNC_ALWAYS)
{
- ae_flags |= AE_BARRIER;
+ ae_barrier = 1;
}
- if (aeCreateFileEvent(server.el, c->fd, ae_flags,
- sendReplyToClient, c) == AE_ERR)
- {
- freeClientAsync(c);
+ if (connSetWriteHandlerWithBarrier(c->conn, sendReplyToClient, ae_barrier) == C_ERR) {
+ freeClientAsync(c);
}
}
}
@@ -1305,15 +1363,15 @@ void resetClient(client *c) {
* path, it is not really released, but only marked for later release. */
void protectClient(client *c) {
c->flags |= CLIENT_PROTECTED;
- aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
- aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
+ connSetReadHandler(c->conn,NULL);
+ connSetWriteHandler(c->conn,NULL);
}
/* This will undo the client protection done by protectClient() */
void unprotectClient(client *c) {
if (c->flags & CLIENT_PROTECTED) {
c->flags &= ~CLIENT_PROTECTED;
- aeCreateFileEvent(server.el,c->fd,AE_READABLE,readQueryFromClient,c);
+ connSetReadHandler(c->conn,readQueryFromClient);
if (clientHasPendingReplies(c)) clientInstallWriteHandler(c);
}
}
@@ -1710,12 +1768,10 @@ void processInputBufferAndReplicate(client *c) {
}
}
-void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
- client *c = (client*) privdata;
+void readQueryFromClient(connection *conn) {
+ client *c = connGetPrivateData(conn);
int nread, readlen;
size_t qblen;
- UNUSED(el);
- UNUSED(mask);
/* Check if we want to read from the client later when exiting from
* the event loop. This is the case if threaded I/O is enabled. */
@@ -1741,12 +1797,12 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
qblen = sdslen(c->querybuf);
if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
c->querybuf = sdsMakeRoomFor(c->querybuf, readlen);
- nread = read(fd, c->querybuf+qblen, readlen);
+ nread = connRead(c->conn, c->querybuf+qblen, readlen);
if (nread == -1) {
- if (errno == EAGAIN) {
+ if (connGetState(conn) == CONN_STATE_CONNECTED) {
return;
} else {
- serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno));
+ serverLog(LL_VERBOSE, "Reading from client: %s",connGetLastError(c->conn));
freeClientAsync(c);
return;
}
@@ -1818,7 +1874,7 @@ void genClientPeerId(client *client, char *peerid,
snprintf(peerid,peerid_len,"%s:0",server.unixsocket);
} else {
/* TCP client. */
- anetFormatPeer(client->fd,peerid,peerid_len);
+ connFormatPeer(client->conn,peerid,peerid_len);
}
}
@@ -1839,8 +1895,7 @@ char *getClientPeerId(client *c) {
/* Concatenate a string representing the state of a client in an human
* readable format, into the sds string 's'. */
sds catClientInfoString(sds s, client *client) {
- char flags[16], events[3], *p;
- int emask;
+ char flags[16], events[3], conninfo[CONN_INFO_LEN], *p;
p = flags;
if (client->flags & CLIENT_SLAVE) {
@@ -1864,16 +1919,17 @@ sds catClientInfoString(sds s, client *client) {
if (p == flags) *p++ = 'N';
*p++ = '\0';
- emask = client->fd == -1 ? 0 : aeGetFileEvents(server.el,client->fd);
p = events;
- if (emask & AE_READABLE) *p++ = 'r';
- if (emask & AE_WRITABLE) *p++ = 'w';
+ if (client->conn) {
+ if (connHasReadHandler(client->conn)) *p++ = 'r';
+ if (connHasWriteHandler(client->conn)) *p++ = 'w';
+ }
*p = '\0';
return sdscatfmt(s,
- "id=%U addr=%s fd=%i name=%s age=%I idle=%I flags=%s db=%i sub=%i psub=%i multi=%i qbuf=%U qbuf-free=%U obl=%U oll=%U omem=%U events=%s cmd=%s user=%s",
+ "id=%U addr=%s %s name=%s age=%I idle=%I flags=%s db=%i sub=%i psub=%i multi=%i qbuf=%U qbuf-free=%U obl=%U oll=%U omem=%U events=%s cmd=%s user=%s",
(unsigned long long) client->id,
getClientPeerId(client),
- client->fd,
+ connGetInfo(client->conn, conninfo, sizeof(conninfo)),
client->name ? (char*)client->name->ptr : "",
(long long)(server.unixtime - client->ctime),
(long long)(server.unixtime - client->lastinteraction),
@@ -2445,7 +2501,7 @@ int checkClientOutputBufferLimits(client *c) {
* called from contexts where the client can't be freed safely, i.e. from the
* lower level functions pushing data inside the client output buffers. */
void asyncCloseClientOnOutputBufferLimitReached(client *c) {
- if (c->fd == -1) return; /* It is unsafe to free fake clients. */
+ if (!c->conn) return; /* It is unsafe to free fake clients. */
serverAssert(c->reply_bytes < SIZE_MAX-(1024*64));
if (c->reply_bytes == 0 || c->flags & CLIENT_CLOSE_ASAP) return;
if (checkClientOutputBufferLimits(c)) {
@@ -2468,8 +2524,7 @@ void flushSlavesOutputBuffers(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = listNodeValue(ln);
- int events = aeGetFileEvents(server.el,slave->fd);
- int can_receive_writes = (events & AE_WRITABLE) ||
+ int can_receive_writes = connHasWriteHandler(slave->conn) ||
(slave->flags & CLIENT_PENDING_WRITE);
/* We don't want to send the pending data to the replica in a few
@@ -2491,7 +2546,7 @@ void flushSlavesOutputBuffers(void) {
!slave->repl_put_online_on_ack &&
clientHasPendingReplies(slave))
{
- writeToClient(slave->fd,slave,0);
+ writeToClient(slave,0);
}
}
}
@@ -2618,9 +2673,9 @@ void *IOThreadMain(void *myid) {
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
if (io_threads_op == IO_THREADS_OP_WRITE) {
- writeToClient(c->fd,c,0);
+ writeToClient(c,0);
} else if (io_threads_op == IO_THREADS_OP_READ) {
- readQueryFromClient(NULL,c->fd,c,0);
+ readQueryFromClient(c->conn);
} else {
serverPanic("io_threads_op value is unknown");
}
@@ -2761,8 +2816,7 @@ int handleClientsWithPendingWritesUsingThreads(void) {
/* Install the write handler if there are pending writes in some
* of the clients. */
if (clientHasPendingReplies(c) &&
- aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
- sendReplyToClient, c) == AE_ERR)
+ connSetWriteHandler(c->conn, sendReplyToClient) == AE_ERR)
{
freeClientAsync(c);
}
diff --git a/src/rdb.c b/src/rdb.c
index e430bcd58..2406ea88a 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2195,6 +2195,8 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) {
* own reference. */
decrRefCount(key);
}
+ if (server.key_load_delay)
+ usleep(server.key_load_delay);
/* Reset the state that is key-specified and is populated by
* opcodes before the key, so that we start from scratch again. */
@@ -2290,8 +2292,6 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
* This function covers the case of RDB -> Salves socket transfers for
* diskless replication. */
void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
- uint64_t *ok_slaves;
-
if (!bysignal && exitcode == 0) {
serverLog(LL_NOTICE,
"Background RDB transfer terminated with success");
@@ -2305,79 +2305,6 @@ void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
server.rdb_save_time_start = -1;
- /* If the child returns an OK exit code, read the set of slave client
- * IDs and the associated status code. We'll terminate all the slaves
- * in error state.
- *
- * If the process returned an error, consider the list of slaves that
- * can continue to be empty, so that it's just a special case of the
- * normal code path. */
- ok_slaves = zmalloc(sizeof(uint64_t)); /* Make space for the count. */
- ok_slaves[0] = 0;
- if (!bysignal && exitcode == 0) {
- int readlen = sizeof(uint64_t);
-
- if (read(server.rdb_pipe_read_result_from_child, ok_slaves, readlen) ==
- readlen)
- {
- readlen = ok_slaves[0]*sizeof(uint64_t)*2;
-
- /* Make space for enough elements as specified by the first
- * uint64_t element in the array. */
- ok_slaves = zrealloc(ok_slaves,sizeof(uint64_t)+readlen);
- if (readlen &&
- read(server.rdb_pipe_read_result_from_child, ok_slaves+1,
- readlen) != readlen)
- {
- ok_slaves[0] = 0;
- }
- }
- }
-
- close(server.rdb_pipe_read_result_from_child);
- close(server.rdb_pipe_write_result_to_parent);
-
- /* We can continue the replication process with all the slaves that
- * correctly received the full payload. Others are terminated. */
- listNode *ln;
- listIter li;
-
- listRewind(server.slaves,&li);
- while((ln = listNext(&li))) {
- client *slave = ln->value;
-
- if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
- uint64_t j;
- int errorcode = 0;
-
- /* Search for the slave ID in the reply. In order for a slave to
- * continue the replication process, we need to find it in the list,
- * and it must have an error code set to 0 (which means success). */
- for (j = 0; j < ok_slaves[0]; j++) {
- if (slave->id == ok_slaves[2*j+1]) {
- errorcode = ok_slaves[2*j+2];
- break; /* Found in slaves list. */
- }
- }
- if (j == ok_slaves[0] || errorcode != 0) {
- serverLog(LL_WARNING,
- "Closing slave %s: child->slave RDB transfer failed: %s",
- replicationGetSlaveName(slave),
- (errorcode == 0) ? "RDB transfer child aborted"
- : strerror(errorcode));
- freeClient(slave);
- } else {
- serverLog(LL_WARNING,
- "Slave %s correctly received the streamed RDB file.",
- replicationGetSlaveName(slave));
- /* Restore the socket as non-blocking. */
- anetNonBlock(NULL,slave->fd);
- anetSendTimeout(NULL,slave->fd,0);
- }
- }
- }
- zfree(ok_slaves);
-
updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, RDB_CHILD_TYPE_SOCKET);
}
@@ -2409,9 +2336,6 @@ void killRDBChild(void) {
/* Spawn an RDB child that writes the RDB to the sockets of the slaves
* that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
- int *fds;
- uint64_t *clientids;
- int numfds;
listNode *ln;
listIter li;
pid_t childpid;
@@ -2419,35 +2343,30 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
if (hasActiveChildProcess()) return C_ERR;
- /* Before to fork, create a pipe that will be used in order to
- * send back to the parent the IDs of the slaves that successfully
- * received all the writes. */
+ /* Even if the previous fork child exited, don't start a new one until we
+ * drained the pipe. */
+ if (server.rdb_pipe_conns) return C_ERR;
+
+ /* Before to fork, create a pipe that is used to transfer the rdb bytes to
+ * the parent, we can't let it write directly to the sockets, since in case
+ * of TLS we must let the parent handle a continuous TLS state when the
+ * child terminates and parent takes over. */
if (pipe(pipefds) == -1) return C_ERR;
- server.rdb_pipe_read_result_from_child = pipefds[0];
- server.rdb_pipe_write_result_to_parent = pipefds[1];
+ server.rdb_pipe_read = pipefds[0];
+ server.rdb_pipe_write = pipefds[1];
+ anetNonBlock(NULL, server.rdb_pipe_read);
- /* Collect the file descriptors of the slaves we want to transfer
+ /* Collect the connections of the replicas we want to transfer
* the RDB to, which are i WAIT_BGSAVE_START state. */
- fds = zmalloc(sizeof(int)*listLength(server.slaves));
- /* We also allocate an array of corresponding client IDs. This will
- * be useful for the child process in order to build the report
- * (sent via unix pipe) that will be sent to the parent. */
- clientids = zmalloc(sizeof(uint64_t)*listLength(server.slaves));
- numfds = 0;
-
+ server.rdb_pipe_conns = zmalloc(sizeof(connection *)*listLength(server.slaves));
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
-
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
- clientids[numfds] = slave->id;
- fds[numfds++] = slave->fd;
+ server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn;
replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
- /* Put the socket in blocking mode to simplify RDB transfer.
- * We'll restore it when the children returns (since duped socket
- * will share the O_NONBLOCK attribute with the parent). */
- anetBlock(NULL,slave->fd);
- anetSendTimeout(NULL,slave->fd,server.repl_timeout*1000);
}
}
@@ -2456,61 +2375,22 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
if ((childpid = redisFork()) == 0) {
/* Child */
int retval;
- rio slave_sockets;
+ rio rdb;
- rioInitWithFdset(&slave_sockets,fds,numfds);
- zfree(fds);
+ rioInitWithFd(&rdb,server.rdb_pipe_write);
redisSetProcTitle("redis-rdb-to-slaves");
- retval = rdbSaveRioWithEOFMark(&slave_sockets,NULL,rsi);
- if (retval == C_OK && rioFlush(&slave_sockets) == 0)
+ retval = rdbSaveRioWithEOFMark(&rdb,NULL,rsi);
+ if (retval == C_OK && rioFlush(&rdb) == 0)
retval = C_ERR;
if (retval == C_OK) {
sendChildCOWInfo(CHILD_INFO_TYPE_RDB, "RDB");
-
- /* If we are returning OK, at least one slave was served
- * with the RDB file as expected, so we need to send a report
- * to the parent via the pipe. The format of the message is:
- *
- * <len> <slave[0].id> <slave[0].error> ...
- *
- * len, slave IDs, and slave errors, are all uint64_t integers,
- * so basically the reply is composed of 64 bits for the len field
- * plus 2 additional 64 bit integers for each entry, for a total
- * of 'len' entries.
- *
- * The 'id' represents the slave's client ID, so that the master
- * can match the report with a specific slave, and 'error' is
- * set to 0 if the replication process terminated with a success
- * or the error code if an error occurred. */
- void *msg = zmalloc(sizeof(uint64_t)*(1+2*numfds));
- uint64_t *len = msg;
- uint64_t *ids = len+1;
- int j, msglen;
-
- *len = numfds;
- for (j = 0; j < numfds; j++) {
- *ids++ = clientids[j];
- *ids++ = slave_sockets.io.fdset.state[j];
- }
-
- /* Write the message to the parent. If we have no good slaves or
- * we are unable to transfer the message to the parent, we exit
- * with an error so that the parent will abort the replication
- * process with all the childre that were waiting. */
- msglen = sizeof(uint64_t)*(1+2*numfds);
- if (*len == 0 ||
- write(server.rdb_pipe_write_result_to_parent,msg,msglen)
- != msglen)
- {
- retval = C_ERR;
- }
- zfree(msg);
}
- zfree(clientids);
- rioFreeFdset(&slave_sockets);
+
+ rioFreeFd(&rdb);
+ close(server.rdb_pipe_write); /* wake up the reader, tell it we're done. */
exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
@@ -2524,17 +2404,16 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
- int j;
-
- for (j = 0; j < numfds; j++) {
- if (slave->id == clientids[j]) {
- slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
- break;
- }
+ if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
+ slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
}
}
- close(pipefds[0]);
- close(pipefds[1]);
+ close(server.rdb_pipe_write);
+ close(server.rdb_pipe_read);
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
closeChildInfoPipe();
} else {
serverLog(LL_NOTICE,"Background RDB transfer started by pid %d",
@@ -2542,9 +2421,11 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
server.rdb_save_time_start = time(NULL);
server.rdb_child_pid = childpid;
server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
+ close(server.rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
+ if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
+ serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
+ }
}
- zfree(clientids);
- zfree(fds);
return (childpid == -1) ? C_ERR : C_OK;
}
return C_OK; /* Unreached. */
diff --git a/src/redis-cli.c b/src/redis-cli.c
index 2830273bb..6d07f7ba6 100644
--- a/src/redis-cli.c
+++ b/src/redis-cli.c
@@ -47,6 +47,10 @@
#include <math.h>
#include <hiredis.h>
+#ifdef USE_OPENSSL
+#include <openssl/ssl.h>
+#include <hiredis_ssl.h>
+#endif
#include <sds.h> /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */
#include "dict.h"
#include "adlist.h"
@@ -188,6 +192,12 @@ static struct config {
char *hostip;
int hostport;
char *hostsocket;
+ int tls;
+ char *sni;
+ char *cacert;
+ char *cacertdir;
+ char *cert;
+ char *key;
long repeat;
long interval;
int dbnum;
@@ -758,6 +768,71 @@ static int cliSelect(void) {
return REDIS_ERR;
}
+/* Wrapper around redisSecureConnection to avoid hiredis_ssl dependencies if
+ * not building with TLS support.
+ */
+static int cliSecureConnection(redisContext *c, const char **err) {
+#ifdef USE_OPENSSL
+ static SSL_CTX *ssl_ctx = NULL;
+
+ if (!ssl_ctx) {
+ ssl_ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ssl_ctx) {
+ *err = "Failed to create SSL_CTX";
+ goto error;
+ }
+
+ SSL_CTX_set_options(ssl_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
+ SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_PEER, NULL);
+
+ if (config.cacert || config.cacertdir) {
+ if (!SSL_CTX_load_verify_locations(ssl_ctx, config.cacert, config.cacertdir)) {
+ *err = "Invalid CA Certificate File/Directory";
+ goto error;
+ }
+ } else {
+ if (!SSL_CTX_set_default_verify_paths(ssl_ctx)) {
+ *err = "Failed to use default CA paths";
+ goto error;
+ }
+ }
+
+ if (config.cert && !SSL_CTX_use_certificate_chain_file(ssl_ctx, config.cert)) {
+ *err = "Invalid client certificate";
+ goto error;
+ }
+
+ if (config.key && !SSL_CTX_use_PrivateKey_file(ssl_ctx, config.key, SSL_FILETYPE_PEM)) {
+ *err = "Invalid private key";
+ goto error;
+ }
+ }
+
+ SSL *ssl = SSL_new(ssl_ctx);
+ if (!ssl) {
+ *err = "Failed to create SSL object";
+ return REDIS_ERR;
+ }
+
+ if (config.sni && !SSL_set_tlsext_host_name(ssl, config.sni)) {
+ *err = "Failed to configure SNI";
+ SSL_free(ssl);
+ return REDIS_ERR;
+ }
+
+ return redisInitiateSSL(c, ssl);
+
+error:
+ SSL_CTX_free(ssl_ctx);
+ ssl_ctx = NULL;
+ return REDIS_ERR;
+#else
+ (void) c;
+ (void) err;
+ return REDIS_OK;
+#endif
+}
+
/* Select RESP3 mode if redis-cli was started with the -3 option. */
static int cliSwitchProto(void) {
redisReply *reply;
@@ -789,6 +864,16 @@ static int cliConnect(int flags) {
context = redisConnectUnix(config.hostsocket);
}
+ if (!context->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(context, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "Could not negotiate a TLS connection: %s\n", err);
+ context = NULL;
+ redisFree(context);
+ return REDIS_ERR;
+ }
+ }
+
if (context->err) {
if (!(flags & CC_QUIET)) {
fprintf(stderr,"Could not connect to Redis at ");
@@ -804,6 +889,7 @@ static int cliConnect(int flags) {
return REDIS_ERR;
}
+
/* Set aggressive KEEP_ALIVE socket option in the Redis context socket
* in order to prevent timeouts caused by the execution of long
* commands. At the same time this improves the detection of real
@@ -1305,6 +1391,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ..
redisFree(c);
c = redisConnect(config.hostip,config.hostport);
+ if (!c->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(c, &err) == REDIS_ERR && err) {
+ fprintf(stderr, "TLS Error: %s\n", err);
+ exit(1);
+ }
+ }
usleep(1000000);
}
@@ -1498,6 +1591,20 @@ static int parseOptions(int argc, char **argv) {
} else if (!strcmp(argv[i],"--cluster-search-multiple-owners")) {
config.cluster_manager_command.flags |=
CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS;
+#ifdef USE_OPENSSL
+ } else if (!strcmp(argv[i],"--tls")) {
+ config.tls = 1;
+ } else if (!strcmp(argv[i],"--sni")) {
+ config.sni = argv[++i];
+ } else if (!strcmp(argv[i],"--cacertdir")) {
+ config.cacertdir = argv[++i];
+ } else if (!strcmp(argv[i],"--cacert")) {
+ config.cacert = argv[++i];
+ } else if (!strcmp(argv[i],"--cert")) {
+ config.cert = argv[++i];
+ } else if (!strcmp(argv[i],"--key")) {
+ config.key = argv[++i];
+#endif
} else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) {
sds version = cliVersion();
printf("redis-cli %s\n", version);
@@ -1591,6 +1698,15 @@ static void usage(void) {
" -x Read last argument from STDIN.\n"
" -d <delimiter> Multi-bulk delimiter in for raw formatting (default: \\n).\n"
" -c Enable cluster mode (follow -ASK and -MOVED redirections).\n"
+#ifdef USE_OPENSSL
+" --tls Establish a secure TLS connection.\n"
+" --cacert CA Certificate file to verify with.\n"
+" --cacertdir Directory where trusted CA certificates are stored.\n"
+" If neither cacert nor cacertdir are specified, the default\n"
+" system-wide trusted root certs configuration will apply.\n"
+" --cert Client certificate to authenticate with.\n"
+" --key Private key file to authenticate with.\n"
+#endif
" --raw Use raw formatting for replies (default when STDOUT is\n"
" not a tty).\n"
" --no-raw Force formatted output even when STDOUT is not a tty.\n"
@@ -1615,7 +1731,9 @@ static void usage(void) {
" --pipe Transfer raw Redis protocol from stdin to server.\n"
" --pipe-timeout <n> In --pipe mode, abort with error if after sending all data.\n"
" no reply is received within <n> seconds.\n"
-" Default timeout: %d. Use 0 to wait forever.\n"
+" Default timeout: %d. Use 0 to wait forever.\n",
+ REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
+ fprintf(stderr,
" --bigkeys Sample Redis keys looking for keys with many elements (complexity).\n"
" --memkeys Sample Redis keys looking for keys consuming a lot of memory.\n"
" --memkeys-samples <n> Sample Redis keys looking for keys consuming a lot of memory.\n"
@@ -1638,8 +1756,7 @@ static void usage(void) {
" line interface.\n"
" --help Output this help and exit.\n"
" --version Output version and exit.\n"
-"\n",
- REDIS_CLI_DEFAULT_PIPE_TIMEOUT);
+"\n");
/* Using another fprintf call to avoid -Woverlength-strings compile warning */
fprintf(stderr,
"Cluster Manager Commands:\n"
@@ -2407,6 +2524,15 @@ cleanup:
static int clusterManagerNodeConnect(clusterManagerNode *node) {
if (node->context) redisFree(node->context);
node->context = redisConnect(node->ip, node->port);
+ if (!node->context->err && config.tls) {
+ const char *err = NULL;
+ if (cliSecureConnection(node->context, &err) == REDIS_ERR && err) {
+ fprintf(stderr,"TLS Error: %s\n", err);
+ redisFree(node->context);
+ node->context = NULL;
+ return 0;
+ }
+ }
if (node->context->err) {
fprintf(stderr,"Could not connect to Redis at ");
fprintf(stderr,"%s:%d: %s\n", node->ip, node->port,
diff --git a/src/replication.c b/src/replication.c
index 5519b9ce2..3c916c9a7 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -39,7 +39,7 @@
#include <sys/stat.h>
void replicationDiscardCachedMaster(void);
-void replicationResurrectCachedMaster(int newfd);
+void replicationResurrectCachedMaster(connection *conn);
void replicationSendAck(void);
void putSlaveOnline(client *slave);
int cancelReplicationHandshake(void);
@@ -57,7 +57,7 @@ char *replicationGetSlaveName(client *c) {
ip[0] = '\0';
buf[0] = '\0';
if (c->slave_ip[0] != '\0' ||
- anetPeerToString(c->fd,ip,sizeof(ip),NULL) != -1)
+ connPeerToString(c->conn,ip,sizeof(ip),NULL) != -1)
{
/* Note that the 'ip' buffer is always larger than 'c->slave_ip' */
if (c->slave_ip[0] != '\0') memcpy(ip,c->slave_ip,sizeof(c->slave_ip));
@@ -432,7 +432,7 @@ int replicationSetupSlaveForFullResync(client *slave, long long offset) {
if (!(slave->flags & CLIENT_PRE_PSYNC)) {
buflen = snprintf(buf,sizeof(buf),"+FULLRESYNC %s %lld\r\n",
server.replid,offset);
- if (write(slave->fd,buf,buflen) != buflen) {
+ if (connWrite(slave->conn,buf,buflen) != buflen) {
freeClientAsync(slave);
return C_ERR;
}
@@ -519,7 +519,7 @@ int masterTryPartialResynchronization(client *c) {
} else {
buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
}
- if (write(c->fd,buf,buflen) != buflen) {
+ if (connWrite(c->conn,buf,buflen) != buflen) {
freeClientAsync(c);
return C_OK;
}
@@ -685,7 +685,7 @@ void syncCommand(client *c) {
* paths will change the state if we handle the slave differently. */
c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
if (server.repl_disable_tcp_nodelay)
- anetDisableTcpNoDelay(NULL, c->fd); /* Non critical if it fails. */
+ connDisableTcpNoDelay(c->conn); /* Non critical if it fails. */
c->repldbfd = -1;
c->flags |= CLIENT_SLAVE;
listAddNodeTail(server.slaves,c);
@@ -862,8 +862,7 @@ void putSlaveOnline(client *slave) {
slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 0;
slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
- sendReplyToClient, slave) == AE_ERR) {
+ if (connSetWriteHandler(slave->conn, sendReplyToClient) == C_ERR) {
serverLog(LL_WARNING,"Unable to register writable event for replica bulk transfer: %s", strerror(errno));
freeClient(slave);
return;
@@ -873,10 +872,8 @@ void putSlaveOnline(client *slave) {
replicationGetSlaveName(slave));
}
-void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
- client *slave = privdata;
- UNUSED(el);
- UNUSED(mask);
+void sendBulkToSlave(connection *conn) {
+ client *slave = connGetPrivateData(conn);
char buf[PROTO_IOBUF_LEN];
ssize_t nwritten, buflen;
@@ -884,10 +881,10 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
* replication process. Currently the preamble is just the bulk count of
* the file in the form "$<length>\r\n". */
if (slave->replpreamble) {
- nwritten = write(fd,slave->replpreamble,sdslen(slave->replpreamble));
+ nwritten = connWrite(conn,slave->replpreamble,sdslen(slave->replpreamble));
if (nwritten == -1) {
serverLog(LL_VERBOSE,"Write error sending RDB preamble to replica: %s",
- strerror(errno));
+ connGetLastError(conn));
freeClient(slave);
return;
}
@@ -911,10 +908,10 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
freeClient(slave);
return;
}
- if ((nwritten = write(fd,buf,buflen)) == -1) {
- if (errno != EAGAIN) {
+ if ((nwritten = connWrite(conn,buf,buflen)) == -1) {
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
serverLog(LL_WARNING,"Write error sending DB to replica: %s",
- strerror(errno));
+ connGetLastError(conn));
freeClient(slave);
}
return;
@@ -924,11 +921,157 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
if (slave->repldboff == slave->repldbsize) {
close(slave->repldbfd);
slave->repldbfd = -1;
- aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
+ connSetWriteHandler(slave->conn,NULL);
putSlaveOnline(slave);
}
}
+/* Remove one write handler from the list of connections waiting to be writable
+ * during rdb pipe transfer. */
+void rdbPipeWriteHandlerConnRemoved(struct connection *conn) {
+ if (!connHasWriteHandler(conn))
+ return;
+ connSetWriteHandler(conn, NULL);
+ server.rdb_pipe_numconns_writing--;
+ /* if there are no more writes for now for this conn, or write error: */
+ if (server.rdb_pipe_numconns_writing == 0) {
+ if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
+ serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
+ }
+ }
+}
+
+/* Called in diskless master during transfer of data from the rdb pipe, when
+ * the replica becomes writable again. */
+void rdbPipeWriteHandler(struct connection *conn) {
+ serverAssert(server.rdb_pipe_bufflen>0);
+ client *slave = connGetPrivateData(conn);
+ int nwritten;
+ if ((nwritten = connWrite(conn, server.rdb_pipe_buff + slave->repldboff,
+ server.rdb_pipe_bufflen - slave->repldboff)) == -1)
+ {
+ if (connGetState(conn) == CONN_STATE_CONNECTED)
+ return; /* equivalent to EAGAIN */
+ serverLog(LL_WARNING,"Write error sending DB to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ return;
+ } else {
+ slave->repldboff += nwritten;
+ server.stat_net_output_bytes += nwritten;
+ if (slave->repldboff < server.rdb_pipe_bufflen)
+ return; /* more data to write.. */
+ }
+ rdbPipeWriteHandlerConnRemoved(conn);
+}
+
+/* When the the pipe serving diskless rdb transfer is drained (write end was
+ * closed), we can clean up all the temporary variables, and cleanup after the
+ * fork child. */
+void RdbPipeCleanup() {
+ close(server.rdb_pipe_read);
+ zfree(server.rdb_pipe_conns);
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ zfree(server.rdb_pipe_buff);
+ server.rdb_pipe_buff = NULL;
+ server.rdb_pipe_bufflen = 0;
+
+ /* Since we're avoiding to detect the child exited as long as the pipe is
+ * not drained, so now is the time to check. */
+ checkChildrenDone();
+}
+
+/* Called in diskless master, when there's data to read from the child's rdb pipe */
+void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask) {
+ UNUSED(mask);
+ UNUSED(clientData);
+ UNUSED(eventLoop);
+ int i;
+ if (!server.rdb_pipe_buff)
+ server.rdb_pipe_buff = zmalloc(PROTO_IOBUF_LEN);
+ serverAssert(server.rdb_pipe_numconns_writing==0);
+
+ while (1) {
+ server.rdb_pipe_bufflen = read(fd, server.rdb_pipe_buff, PROTO_IOBUF_LEN);
+ if (server.rdb_pipe_bufflen < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ return;
+ serverLog(LL_WARNING,"Diskless rdb transfer, read error sending DB to replicas: %s", strerror(errno));
+ for (i=0; i < server.rdb_pipe_numconns; i++) {
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+ client *slave = connGetPrivateData(conn);
+ freeClient(slave);
+ server.rdb_pipe_conns[i] = NULL;
+ }
+ killRDBChild();
+ return;
+ }
+
+ if (server.rdb_pipe_bufflen == 0) {
+ /* EOF - write end was closed. */
+ int stillUp = 0;
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ for (i=0; i < server.rdb_pipe_numconns; i++)
+ {
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+ stillUp++;
+ }
+ serverLog(LL_WARNING,"Diskless rdb transfer, done reading from pipe, %d replicas still up.", stillUp);
+ RdbPipeCleanup();
+ return;
+ }
+
+ int stillAlive = 0;
+ for (i=0; i < server.rdb_pipe_numconns; i++)
+ {
+ int nwritten;
+ connection *conn = server.rdb_pipe_conns[i];
+ if (!conn)
+ continue;
+
+ client *slave = connGetPrivateData(conn);
+ if ((nwritten = connWrite(conn, server.rdb_pipe_buff, server.rdb_pipe_bufflen)) == -1) {
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
+ serverLog(LL_WARNING,"Diskless rdb transfer, write error sending DB to replica: %s",
+ connGetLastError(conn));
+ freeClient(slave);
+ server.rdb_pipe_conns[i] = NULL;
+ continue;
+ }
+ /* An error and still in connected state, is equivalent to EAGAIN */
+ slave->repldboff = 0;
+ } else {
+ slave->repldboff = nwritten;
+ server.stat_net_output_bytes += nwritten;
+ }
+ /* If we were unable to write all the data to one of the replicas,
+ * setup write handler (and disable pipe read handler, below) */
+ if (nwritten != server.rdb_pipe_bufflen) {
+ server.rdb_pipe_numconns_writing++;
+ connSetWriteHandler(conn, rdbPipeWriteHandler);
+ }
+ stillAlive++;
+ }
+
+ if (stillAlive == 0) {
+ serverLog(LL_WARNING,"Diskless rdb transfer, last replica dropped, killing fork child.");
+ killRDBChild();
+ RdbPipeCleanup();
+ }
+ /* Remove the pipe read handler if at least one write handler was set. */
+ if (server.rdb_pipe_numconns_writing || stillAlive == 0) {
+ aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
+ break;
+ }
+ }
+}
+
/* This function is called at the end of every background saving,
* or when the replication RDB transfer strategy is modified from
* disk to socket or the other way around.
@@ -1015,8 +1158,8 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
(unsigned long long) slave->repldbsize);
- aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
- if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
+ connSetWriteHandler(slave->conn,NULL);
+ if (connSetWriteHandler(slave->conn,sendBulkToSlave) == C_ERR) {
freeClient(slave);
continue;
}
@@ -1084,9 +1227,8 @@ void replicationSendNewlineToMaster(void) {
static time_t newline_sent;
if (time(NULL) != newline_sent) {
newline_sent = time(NULL);
- if (write(server.repl_transfer_s,"\n",1) == -1) {
- /* Pinging back in this stage is best-effort. */
- }
+ /* Pinging back in this stage is best-effort. */
+ if (server.repl_transfer_s) connWrite(server.repl_transfer_s, "\n", 1);
}
}
@@ -1100,8 +1242,10 @@ void replicationEmptyDbCallback(void *privdata) {
/* Once we have a link with the master and the synchroniziation was
* performed, this function materializes the master client we store
* at server.master, starting from the specified file descriptor. */
-void replicationCreateMasterClient(int fd, int dbid) {
- server.master = createClient(fd);
+void replicationCreateMasterClient(connection *conn, int dbid) {
+ server.master = createClient(conn);
+ if (conn)
+ connSetReadHandler(server.master->conn, readQueryFromClient);
server.master->flags |= CLIENT_MASTER;
server.master->authenticated = 1;
server.master->reploff = server.master_initial_offset;
@@ -1196,7 +1340,7 @@ void disklessLoadRestoreBackups(redisDb *backup, int restore, int empty_db_flags
/* Asynchronously read the SYNC payload we receive from a master */
#define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */
-void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
+void readSyncBulkPayload(connection *conn) {
char buf[4096];
ssize_t nread, readlen, nwritten;
int use_diskless_load;
@@ -1204,9 +1348,6 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC :
EMPTYDB_NO_FLAGS;
off_t left;
- UNUSED(el);
- UNUSED(privdata);
- UNUSED(mask);
/* Static vars used to hold the EOF mark, and the last bytes received
* form the server: when they match, we reached the end of the transfer. */
@@ -1217,7 +1358,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
/* If repl_transfer_size == -1 we still have to read the bulk length
* from the master reply. */
if (server.repl_transfer_size == -1) {
- if (syncReadLine(fd,buf,1024,server.repl_syncio_timeout*1000) == -1) {
+ if (connSyncReadLine(conn,buf,1024,server.repl_syncio_timeout*1000) == -1) {
serverLog(LL_WARNING,
"I/O error reading bulk count from MASTER: %s",
strerror(errno));
@@ -1282,7 +1423,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf);
}
- nread = read(fd,buf,readlen);
+ nread = connRead(conn,buf,readlen);
if (nread <= 0) {
serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s",
(nread == -1) ? strerror(errno) : "connection lost");
@@ -1390,17 +1531,17 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
* handler, otherwise it will get called recursively since
* rdbLoad() will call the event loop to process events from time to
* time for non blocking loading. */
- aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
+ connSetReadHandler(conn, NULL);
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
if (use_diskless_load) {
rio rdb;
- rioInitWithFd(&rdb,fd,server.repl_transfer_size);
+ rioInitWithConn(&rdb,conn,server.repl_transfer_size);
/* Put the socket in blocking mode to simplify RDB transfer.
* We'll restore it when the RDB is received. */
- anetBlock(NULL,fd);
- anetRecvTimeout(NULL,fd,server.repl_timeout*1000);
+ connBlock(conn);
+ connRecvTimeout(conn, server.repl_timeout*1000);
startLoading(server.repl_transfer_size);
if (rdbLoadRio(&rdb,&rsi,0) != C_OK) {
@@ -1410,7 +1551,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
"Failed trying to load the MASTER synchronization DB "
"from socket");
cancelReplicationHandshake();
- rioFreeFd(&rdb, NULL);
+ rioFreeConn(&rdb, NULL);
if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) {
/* Restore the backed up databases. */
disklessLoadRestoreBackups(diskless_load_backup,1,
@@ -1443,16 +1584,16 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
{
serverLog(LL_WARNING,"Replication stream EOF marker is broken");
cancelReplicationHandshake();
- rioFreeFd(&rdb, NULL);
+ rioFreeConn(&rdb, NULL);
return;
}
}
/* Cleanup and restore the socket to the original state to continue
* with the normal replication. */
- rioFreeFd(&rdb, NULL);
- anetNonBlock(NULL,fd);
- anetRecvTimeout(NULL,fd,0);
+ rioFreeConn(&rdb, NULL);
+ connNonBlock(conn);
+ connRecvTimeout(conn,0);
} else {
/* Ensure background save doesn't overwrite synced data */
if (server.rdb_child_pid != -1) {
@@ -1529,7 +1670,7 @@ error:
#define SYNC_CMD_READ (1<<0)
#define SYNC_CMD_WRITE (1<<1)
#define SYNC_CMD_FULL (SYNC_CMD_READ|SYNC_CMD_WRITE)
-char *sendSynchronousCommand(int flags, int fd, ...) {
+char *sendSynchronousCommand(int flags, connection *conn, ...) {
/* Create the command to send to the master, we use redis binary
* protocol to make sure correct arguments are sent. This function
@@ -1540,7 +1681,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
sds cmd = sdsempty();
sds cmdargs = sdsempty();
size_t argslen = 0;
- va_start(ap,fd);
+ va_start(ap,conn);
while(1) {
arg = va_arg(ap, char*);
@@ -1557,12 +1698,12 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
sdsfree(cmdargs);
/* Transfer command to the server. */
- if (syncWrite(fd,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
+ if (connSyncWrite(conn,cmd,sdslen(cmd),server.repl_syncio_timeout*1000)
== -1)
{
sdsfree(cmd);
return sdscatprintf(sdsempty(),"-Writing to master: %s",
- strerror(errno));
+ connGetLastError(conn));
}
sdsfree(cmd);
}
@@ -1571,7 +1712,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
if (flags & SYNC_CMD_READ) {
char buf[256];
- if (syncReadLine(fd,buf,sizeof(buf),server.repl_syncio_timeout*1000)
+ if (connSyncReadLine(conn,buf,sizeof(buf),server.repl_syncio_timeout*1000)
== -1)
{
return sdscatprintf(sdsempty(),"-Reading from master: %s",
@@ -1637,7 +1778,7 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
#define PSYNC_FULLRESYNC 3
#define PSYNC_NOT_SUPPORTED 4
#define PSYNC_TRY_LATER 5
-int slaveTryPartialResynchronization(int fd, int read_reply) {
+int slaveTryPartialResynchronization(connection *conn, int read_reply) {
char *psync_replid;
char psync_offset[32];
sds reply;
@@ -1662,18 +1803,18 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
}
/* Issue the PSYNC command */
- reply = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PSYNC",psync_replid,psync_offset,NULL);
+ reply = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PSYNC",psync_replid,psync_offset,NULL);
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
- aeDeleteFileEvent(server.el,fd,AE_READABLE);
+ connSetReadHandler(conn, NULL);
return PSYNC_WRITE_ERROR;
}
return PSYNC_WAIT_REPLY;
}
/* Reading half */
- reply = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ reply = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
if (sdslen(reply) == 0) {
/* The master may send empty newlines after it receives PSYNC
* and before to reply, just to keep the connection alive. */
@@ -1681,7 +1822,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
return PSYNC_WAIT_REPLY;
}
- aeDeleteFileEvent(server.el,fd,AE_READABLE);
+ connSetReadHandler(conn, NULL);
if (!strncmp(reply,"+FULLRESYNC",11)) {
char *replid = NULL, *offset = NULL;
@@ -1755,7 +1896,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
/* Setup the replication to continue. */
sdsfree(reply);
- replicationResurrectCachedMaster(fd);
+ replicationResurrectCachedMaster(conn);
/* If this instance was restarted and we read the metadata to
* PSYNC from the persistence file, our replication backlog could
@@ -1797,29 +1938,23 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
/* This handler fires when the non blocking connect was able to
* establish a connection with the master. */
-void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
+void syncWithMaster(connection *conn) {
char tmpfile[256], *err = NULL;
int dfd = -1, maxtries = 5;
- int sockerr = 0, psync_result;
- socklen_t errlen = sizeof(sockerr);
- UNUSED(el);
- UNUSED(privdata);
- UNUSED(mask);
+ int psync_result;
/* If this event fired after the user turned the instance into a master
* with SLAVEOF NO ONE we must just return ASAP. */
if (server.repl_state == REPL_STATE_NONE) {
- close(fd);
+ connClose(conn);
return;
}
/* Check for errors in the socket: after a non blocking connect() we
* may find that the socket is in error state. */
- if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1)
- sockerr = errno;
- if (sockerr) {
+ if (connGetState(conn) != CONN_STATE_CONNECTED) {
serverLog(LL_WARNING,"Error condition on socket for SYNC: %s",
- strerror(sockerr));
+ connGetLastError(conn));
goto error;
}
@@ -1828,18 +1963,19 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
- aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
+ connSetReadHandler(conn, syncWithMaster);
+ connSetWriteHandler(conn, NULL);
server.repl_state = REPL_STATE_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"PING",NULL);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"PING",NULL);
if (err) goto write_error;
return;
}
/* Receive the PONG command. */
if (server.repl_state == REPL_STATE_RECEIVE_PONG) {
- err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
/* We accept only two replies as valid, a positive +PONG reply
* (we just check for "+") or an authentication error.
@@ -1864,13 +2000,13 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* AUTH with the master if required. */
if (server.repl_state == REPL_STATE_SEND_AUTH) {
if (server.masteruser && server.masterauth) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",
server.masteruser,server.masterauth,NULL);
if (err) goto write_error;
server.repl_state = REPL_STATE_RECEIVE_AUTH;
return;
} else if (server.masterauth) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",server.masterauth,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"AUTH",server.masterauth,NULL);
if (err) goto write_error;
server.repl_state = REPL_STATE_RECEIVE_AUTH;
return;
@@ -1881,7 +2017,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Receive AUTH reply. */
if (server.repl_state == REPL_STATE_RECEIVE_AUTH) {
- err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
if (err[0] == '-') {
serverLog(LL_WARNING,"Unable to AUTH to MASTER: %s",err);
sdsfree(err);
@@ -1894,11 +2030,14 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Set the slave port, so that Master's INFO command can list the
* slave listening port correctly. */
if (server.repl_state == REPL_STATE_SEND_PORT) {
- sds port = sdsfromlonglong(server.slave_announce_port ?
- server.slave_announce_port : server.port);
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
- "listening-port",port, NULL);
- sdsfree(port);
+ int port;
+ if (server.slave_announce_port) port = server.slave_announce_port;
+ else if (server.tls_replication && server.tls_port) port = server.tls_port;
+ else port = server.port;
+ sds portstr = sdsfromlonglong(port);
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
+ "listening-port",portstr, NULL);
+ sdsfree(portstr);
if (err) goto write_error;
sdsfree(err);
server.repl_state = REPL_STATE_RECEIVE_PORT;
@@ -1907,7 +2046,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Receive REPLCONF listening-port reply. */
if (server.repl_state == REPL_STATE_RECEIVE_PORT) {
- err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
@@ -1928,7 +2067,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Set the slave ip, so that Master's INFO command can list the
* slave IP address port correctly in case of port forwarding or NAT. */
if (server.repl_state == REPL_STATE_SEND_IP) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
"ip-address",server.slave_announce_ip, NULL);
if (err) goto write_error;
sdsfree(err);
@@ -1938,7 +2077,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Receive REPLCONF ip-address reply. */
if (server.repl_state == REPL_STATE_RECEIVE_IP) {
- err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF listening-port. */
if (err[0] == '-') {
@@ -1956,7 +2095,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
*
* The master will ignore capabilities it does not understand. */
if (server.repl_state == REPL_STATE_SEND_CAPA) {
- err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"REPLCONF",
+ err = sendSynchronousCommand(SYNC_CMD_WRITE,conn,"REPLCONF",
"capa","eof","capa","psync2",NULL);
if (err) goto write_error;
sdsfree(err);
@@ -1966,7 +2105,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* Receive CAPA reply. */
if (server.repl_state == REPL_STATE_RECEIVE_CAPA) {
- err = sendSynchronousCommand(SYNC_CMD_READ,fd,NULL);
+ err = sendSynchronousCommand(SYNC_CMD_READ,conn,NULL);
/* Ignore the error if any, not all the Redis versions support
* REPLCONF capa. */
if (err[0] == '-') {
@@ -1983,7 +2122,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
if (server.repl_state == REPL_STATE_SEND_PSYNC) {
- if (slaveTryPartialResynchronization(fd,0) == PSYNC_WRITE_ERROR) {
+ if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
err = sdsnew("Write error sending the PSYNC command.");
goto write_error;
}
@@ -1999,7 +2138,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
goto error;
}
- psync_result = slaveTryPartialResynchronization(fd,1);
+ psync_result = slaveTryPartialResynchronization(conn,1);
if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
/* If the master is in an transient error, we should try to PSYNC
@@ -2028,7 +2167,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
* already populated. */
if (psync_result == PSYNC_NOT_SUPPORTED) {
serverLog(LL_NOTICE,"Retrying with SYNC...");
- if (syncWrite(fd,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
+ if (connSyncWrite(conn,"SYNC\r\n",6,server.repl_syncio_timeout*1000) == -1) {
serverLog(LL_WARNING,"I/O error writing to MASTER: %s",
strerror(errno));
goto error;
@@ -2053,12 +2192,13 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
}
/* Setup the non blocking download of the bulk file. */
- if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
- == AE_ERR)
+ if (connSetReadHandler(conn, readSyncBulkPayload)
+ == C_ERR)
{
+ char conninfo[CONN_INFO_LEN];
serverLog(LL_WARNING,
- "Can't create readable event for SYNC: %s (fd=%d)",
- strerror(errno),fd);
+ "Can't create readable event for SYNC: %s (%s)",
+ strerror(errno), connGetInfo(conn, conninfo, sizeof(conninfo)));
goto error;
}
@@ -2070,16 +2210,15 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
error:
- aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
if (dfd != -1) close(dfd);
- close(fd);
+ connClose(conn);
+ server.repl_transfer_s = NULL;
if (server.repl_transfer_fd != -1)
close(server.repl_transfer_fd);
if (server.repl_transfer_tmpfile)
zfree(server.repl_transfer_tmpfile);
server.repl_transfer_tmpfile = NULL;
server.repl_transfer_fd = -1;
- server.repl_transfer_s = -1;
server.repl_state = REPL_STATE_CONNECT;
return;
@@ -2090,26 +2229,18 @@ write_error: /* Handle sendSynchronousCommand(SYNC_CMD_WRITE) errors. */
}
int connectWithMaster(void) {
- int fd;
-
- fd = anetTcpNonBlockBestEffortBindConnect(NULL,
- server.masterhost,server.masterport,NET_FIRST_BIND_ADDR);
- if (fd == -1) {
+ server.repl_transfer_s = server.tls_replication ? connCreateTLS() : connCreateSocket();
+ if (connConnect(server.repl_transfer_s, server.masterhost, server.masterport,
+ NET_FIRST_BIND_ADDR, syncWithMaster) == C_ERR) {
serverLog(LL_WARNING,"Unable to connect to MASTER: %s",
- strerror(errno));
+ connGetLastError(server.repl_transfer_s));
+ connClose(server.repl_transfer_s);
+ server.repl_transfer_s = NULL;
return C_ERR;
}
- if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
- AE_ERR)
- {
- close(fd);
- serverLog(LL_WARNING,"Can't create readable event for SYNC");
- return C_ERR;
- }
server.repl_transfer_lastio = server.unixtime;
- server.repl_transfer_s = fd;
server.repl_state = REPL_STATE_CONNECTING;
return C_OK;
}
@@ -2119,11 +2250,8 @@ int connectWithMaster(void) {
* Never call this function directly, use cancelReplicationHandshake() instead.
*/
void undoConnectWithMaster(void) {
- int fd = server.repl_transfer_s;
-
- aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
- close(fd);
- server.repl_transfer_s = -1;
+ connClose(server.repl_transfer_s);
+ server.repl_transfer_s = NULL;
}
/* Abort the async download of the bulk dataset while SYNC-ing with master.
@@ -2311,7 +2439,7 @@ void roleCommand(client *c) {
char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
if (slaveip[0] == '\0') {
- if (anetPeerToString(slave->fd,ip,sizeof(ip),NULL) == -1)
+ if (connPeerToString(slave->conn,ip,sizeof(ip),NULL) == -1)
continue;
slaveip = ip;
}
@@ -2433,7 +2561,7 @@ void replicationCacheMasterUsingMyself(void) {
/* The master client we create can be set to any DBID, because
* the new master will start its replication stream with SELECT. */
server.master_initial_offset = server.master_repl_offset;
- replicationCreateMasterClient(-1,-1);
+ replicationCreateMasterClient(NULL,-1);
/* Use our own ID / offset. */
memcpy(server.master->replid, server.replid, sizeof(server.replid));
@@ -2462,10 +2590,11 @@ void replicationDiscardCachedMaster(void) {
* This function is called when successfully setup a partial resynchronization
* so the stream of data that we'll receive will start from were this
* master left. */
-void replicationResurrectCachedMaster(int newfd) {
+void replicationResurrectCachedMaster(connection *conn) {
server.master = server.cached_master;
server.cached_master = NULL;
- server.master->fd = newfd;
+ server.master->conn = conn;
+ connSetPrivateData(server.master->conn, server.master);
server.master->flags &= ~(CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP);
server.master->authenticated = 1;
server.master->lastinteraction = server.unixtime;
@@ -2474,8 +2603,7 @@ void replicationResurrectCachedMaster(int newfd) {
/* Re-add to the list of clients. */
linkClient(server.master);
- if (aeCreateFileEvent(server.el, newfd, AE_READABLE,
- readQueryFromClient, server.master)) {
+ if (connSetReadHandler(server.master->conn, readQueryFromClient)) {
serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
@@ -2483,8 +2611,7 @@ void replicationResurrectCachedMaster(int newfd) {
/* We may also need to install the write handler as well if there is
* pending data in the write buffers. */
if (clientHasPendingReplies(server.master)) {
- if (aeCreateFileEvent(server.el, newfd, AE_WRITABLE,
- sendReplyToClient, server.master)) {
+ if (connSetWriteHandler(server.master->conn, sendReplyToClient)) {
serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
}
@@ -2854,9 +2981,7 @@ void replicationCron(void) {
server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
if (is_presync) {
- if (write(slave->fd, "\n", 1) == -1) {
- /* Don't worry about socket errors, it's just a ping. */
- }
+ connWrite(slave->conn, "\n", 1);
}
}
diff --git a/src/rio.c b/src/rio.c
index bdbc5d0e9..c8c924380 100644
--- a/src/rio.c
+++ b/src/rio.c
@@ -159,13 +159,13 @@ void rioInitWithFile(rio *r, FILE *fp) {
r->io.file.autosync = 0;
}
-/* ------------------- File descriptor implementation -------------------
+/* ------------------- Connection implementation -------------------
* We use this RIO implemetnation when reading an RDB file directly from
- * the socket to the memory via rdbLoadRio(), thus this implementation
- * only implements reading from a file descriptor that is, normally,
+ * the connection to the memory via rdbLoadRio(), thus this implementation
+ * only implements reading from a connection that is, normally,
* just a socket. */
-static size_t rioFdWrite(rio *r, const void *buf, size_t len) {
+static size_t rioConnWrite(rio *r, const void *buf, size_t len) {
UNUSED(r);
UNUSED(buf);
UNUSED(len);
@@ -173,72 +173,72 @@ static size_t rioFdWrite(rio *r, const void *buf, size_t len) {
}
/* Returns 1 or 0 for success/failure. */
-static size_t rioFdRead(rio *r, void *buf, size_t len) {
- size_t avail = sdslen(r->io.fd.buf)-r->io.fd.pos;
+static size_t rioConnRead(rio *r, void *buf, size_t len) {
+ size_t avail = sdslen(r->io.conn.buf)-r->io.conn.pos;
/* If the buffer is too small for the entire request: realloc. */
- if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len)
- r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf));
+ if (sdslen(r->io.conn.buf) + sdsavail(r->io.conn.buf) < len)
+ r->io.conn.buf = sdsMakeRoomFor(r->io.conn.buf, len - sdslen(r->io.conn.buf));
/* If the remaining unused buffer is not large enough: memmove so that we
* can read the rest. */
- if (len > avail && sdsavail(r->io.fd.buf) < len - avail) {
- sdsrange(r->io.fd.buf, r->io.fd.pos, -1);
- r->io.fd.pos = 0;
+ if (len > avail && sdsavail(r->io.conn.buf) < len - avail) {
+ sdsrange(r->io.conn.buf, r->io.conn.pos, -1);
+ r->io.conn.pos = 0;
}
/* If we don't already have all the data in the sds, read more */
- while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) {
- size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos;
+ while (len > sdslen(r->io.conn.buf) - r->io.conn.pos) {
+ size_t buffered = sdslen(r->io.conn.buf) - r->io.conn.pos;
size_t toread = len - buffered;
/* Read either what's missing, or PROTO_IOBUF_LEN, the bigger of
* the two. */
if (toread < PROTO_IOBUF_LEN) toread = PROTO_IOBUF_LEN;
- if (toread > sdsavail(r->io.fd.buf)) toread = sdsavail(r->io.fd.buf);
- if (r->io.fd.read_limit != 0 &&
- r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit)
+ if (toread > sdsavail(r->io.conn.buf)) toread = sdsavail(r->io.conn.buf);
+ if (r->io.conn.read_limit != 0 &&
+ r->io.conn.read_so_far + buffered + toread > r->io.conn.read_limit)
{
- if (r->io.fd.read_limit >= r->io.fd.read_so_far - buffered)
- toread = r->io.fd.read_limit - r->io.fd.read_so_far - buffered;
+ if (r->io.conn.read_limit >= r->io.conn.read_so_far - buffered)
+ toread = r->io.conn.read_limit - r->io.conn.read_so_far - buffered;
else {
errno = EOVERFLOW;
return 0;
}
}
- int retval = read(r->io.fd.fd,
- (char*)r->io.fd.buf + sdslen(r->io.fd.buf),
+ int retval = connRead(r->io.conn.conn,
+ (char*)r->io.conn.buf + sdslen(r->io.conn.buf),
toread);
if (retval <= 0) {
if (errno == EWOULDBLOCK) errno = ETIMEDOUT;
return 0;
}
- sdsIncrLen(r->io.fd.buf, retval);
+ sdsIncrLen(r->io.conn.buf, retval);
}
- memcpy(buf, (char*)r->io.fd.buf + r->io.fd.pos, len);
- r->io.fd.read_so_far += len;
- r->io.fd.pos += len;
+ memcpy(buf, (char*)r->io.conn.buf + r->io.conn.pos, len);
+ r->io.conn.read_so_far += len;
+ r->io.conn.pos += len;
return len;
}
/* Returns read/write position in file. */
-static off_t rioFdTell(rio *r) {
- return r->io.fd.read_so_far;
+static off_t rioConnTell(rio *r) {
+ return r->io.conn.read_so_far;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
-static int rioFdFlush(rio *r) {
+static int rioConnFlush(rio *r) {
/* Our flush is implemented by the write method, that recognizes a
* buffer set to NULL with a count of zero as a flush request. */
- return rioFdWrite(r,NULL,0);
+ return rioConnWrite(r,NULL,0);
}
-static const rio rioFdIO = {
- rioFdRead,
- rioFdWrite,
- rioFdTell,
- rioFdFlush,
+static const rio rioConnIO = {
+ rioConnRead,
+ rioConnWrite,
+ rioConnTell,
+ rioConnFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* flags */
@@ -249,108 +249,90 @@ static const rio rioFdIO = {
/* Create an RIO that implements a buffered read from an fd
* read_limit argument stops buffering when the reaching the limit. */
-void rioInitWithFd(rio *r, int fd, size_t read_limit) {
- *r = rioFdIO;
- r->io.fd.fd = fd;
- r->io.fd.pos = 0;
- r->io.fd.read_limit = read_limit;
- r->io.fd.read_so_far = 0;
- r->io.fd.buf = sdsnewlen(NULL, PROTO_IOBUF_LEN);
- sdsclear(r->io.fd.buf);
+void rioInitWithConn(rio *r, connection *conn, size_t read_limit) {
+ *r = rioConnIO;
+ r->io.conn.conn = conn;
+ r->io.conn.pos = 0;
+ r->io.conn.read_limit = read_limit;
+ r->io.conn.read_so_far = 0;
+ r->io.conn.buf = sdsnewlen(NULL, PROTO_IOBUF_LEN);
+ sdsclear(r->io.conn.buf);
}
/* Release the RIO tream. Optionally returns the unread buffered data
* when the SDS pointer 'remaining' is passed. */
-void rioFreeFd(rio *r, sds *remaining) {
- if (remaining && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) {
- if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1);
- *remaining = r->io.fd.buf;
+void rioFreeConn(rio *r, sds *remaining) {
+ if (remaining && (size_t)r->io.conn.pos < sdslen(r->io.conn.buf)) {
+ if (r->io.conn.pos > 0) sdsrange(r->io.conn.buf, r->io.conn.pos, -1);
+ *remaining = r->io.conn.buf;
} else {
- sdsfree(r->io.fd.buf);
+ sdsfree(r->io.conn.buf);
if (remaining) *remaining = NULL;
}
- r->io.fd.buf = NULL;
+ r->io.conn.buf = NULL;
}
-/* ------------------- File descriptors set implementation ------------------
- * This target is used to write the RDB file to N different replicas via
- * sockets, when the master just streams the data to the replicas without
- * creating an RDB on-disk image (diskless replication option).
+/* ------------------- File descriptor implementation ------------------
+ * This target is used to write the RDB file to pipe, when the master just
+ * streams the data to the replicas without creating an RDB on-disk image
+ * (diskless replication option).
* It only implements writes. */
/* Returns 1 or 0 for success/failure.
- * The function returns success as long as we are able to correctly write
- * to at least one file descriptor.
*
* When buf is NULL and len is 0, the function performs a flush operation
* if there is some pending buffer, so this function is also used in order
- * to implement rioFdsetFlush(). */
-static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
+ * to implement rioFdFlush(). */
+static size_t rioFdWrite(rio *r, const void *buf, size_t len) {
ssize_t retval;
- int j;
unsigned char *p = (unsigned char*) buf;
int doflush = (buf == NULL && len == 0);
- /* To start we always append to our buffer. If it gets larger than
- * a given size, we actually write to the sockets. */
- if (len) {
- r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
- len = 0; /* Prevent entering the while below if we don't flush. */
- if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1;
- }
-
- if (doflush) {
- p = (unsigned char*) r->io.fdset.buf;
- len = sdslen(r->io.fdset.buf);
+ /* For small writes, we rather keep the data in user-space buffer, and flush
+ * it only when it grows. however for larger writes, we prefer to flush
+ * any pre-existing buffer, and write the new one directly without reallocs
+ * and memory copying. */
+ if (len > PROTO_IOBUF_LEN) {
+ /* First, flush any pre-existing buffered data. */
+ if (sdslen(r->io.fd.buf)) {
+ if (rioFdWrite(r, NULL, 0) == 0)
+ return 0;
+ }
+ /* Write the new data, keeping 'p' and 'len' from the input. */
+ } else {
+ if (len) {
+ r->io.fd.buf = sdscatlen(r->io.fd.buf,buf,len);
+ if (sdslen(r->io.fd.buf) > PROTO_IOBUF_LEN)
+ doflush = 1;
+ if (!doflush)
+ return 1;
+ }
+ /* Flusing the buffered data. set 'p' and 'len' accordintly. */
+ p = (unsigned char*) r->io.fd.buf;
+ len = sdslen(r->io.fd.buf);
}
- /* Write in little chunchs so that when there are big writes we
- * parallelize while the kernel is sending data in background to
- * the TCP socket. */
- while(len) {
- size_t count = len < 1024 ? len : 1024;
- int broken = 0;
- for (j = 0; j < r->io.fdset.numfds; j++) {
- if (r->io.fdset.state[j] != 0) {
- /* Skip FDs alraedy in error. */
- broken++;
- continue;
- }
-
- /* Make sure to write 'count' bytes to the socket regardless
- * of short writes. */
- size_t nwritten = 0;
- while(nwritten != count) {
- retval = write(r->io.fdset.fds[j],p+nwritten,count-nwritten);
- if (retval <= 0) {
- /* With blocking sockets, which is the sole user of this
- * rio target, EWOULDBLOCK is returned only because of
- * the SO_SNDTIMEO socket option, so we translate the error
- * into one more recognizable by the user. */
- if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
- break;
- }
- nwritten += retval;
- }
-
- if (nwritten != count) {
- /* Mark this FD as broken. */
- r->io.fdset.state[j] = errno;
- if (r->io.fdset.state[j] == 0) r->io.fdset.state[j] = EIO;
- }
+ size_t nwritten = 0;
+ while(nwritten != len) {
+ retval = write(r->io.fd.fd,p+nwritten,len-nwritten);
+ if (retval <= 0) {
+ /* With blocking io, which is the sole user of this
+ * rio target, EWOULDBLOCK is returned only because of
+ * the SO_SNDTIMEO socket option, so we translate the error
+ * into one more recognizable by the user. */
+ if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
+ return 0; /* error. */
}
- if (broken == r->io.fdset.numfds) return 0; /* All the FDs in error. */
- p += count;
- len -= count;
- r->io.fdset.pos += count;
+ nwritten += retval;
}
- if (doflush) sdsclear(r->io.fdset.buf);
+ r->io.fd.pos += len;
+ sdsclear(r->io.fd.buf);
return 1;
}
/* Returns 1 or 0 for success/failure. */
-static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
+static size_t rioFdRead(rio *r, void *buf, size_t len) {
UNUSED(r);
UNUSED(buf);
UNUSED(len);
@@ -358,23 +340,23 @@ static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
}
/* Returns read/write position in file. */
-static off_t rioFdsetTell(rio *r) {
- return r->io.fdset.pos;
+static off_t rioFdTell(rio *r) {
+ return r->io.fd.pos;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
-static int rioFdsetFlush(rio *r) {
+static int rioFdFlush(rio *r) {
/* Our flush is implemented by the write method, that recognizes a
* buffer set to NULL with a count of zero as a flush request. */
- return rioFdsetWrite(r,NULL,0);
+ return rioFdWrite(r,NULL,0);
}
-static const rio rioFdsetIO = {
- rioFdsetRead,
- rioFdsetWrite,
- rioFdsetTell,
- rioFdsetFlush,
+static const rio rioFdIO = {
+ rioFdRead,
+ rioFdWrite,
+ rioFdTell,
+ rioFdFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* flags */
@@ -383,24 +365,16 @@ static const rio rioFdsetIO = {
{ { NULL, 0 } } /* union for io-specific vars */
};
-void rioInitWithFdset(rio *r, int *fds, int numfds) {
- int j;
-
- *r = rioFdsetIO;
- r->io.fdset.fds = zmalloc(sizeof(int)*numfds);
- r->io.fdset.state = zmalloc(sizeof(int)*numfds);
- memcpy(r->io.fdset.fds,fds,sizeof(int)*numfds);
- for (j = 0; j < numfds; j++) r->io.fdset.state[j] = 0;
- r->io.fdset.numfds = numfds;
- r->io.fdset.pos = 0;
- r->io.fdset.buf = sdsempty();
+void rioInitWithFd(rio *r, int fd) {
+ *r = rioFdIO;
+ r->io.fd.fd = fd;
+ r->io.fd.pos = 0;
+ r->io.fd.buf = sdsempty();
}
/* release the rio stream. */
-void rioFreeFdset(rio *r) {
- zfree(r->io.fdset.fds);
- zfree(r->io.fdset.state);
- sdsfree(r->io.fdset.buf);
+void rioFreeFd(rio *r) {
+ sdsfree(r->io.fd.buf);
}
/* ---------------------------- Generic functions ---------------------------- */
diff --git a/src/rio.h b/src/rio.h
index eb7a05748..9576335e8 100644
--- a/src/rio.h
+++ b/src/rio.h
@@ -35,6 +35,7 @@
#include <stdio.h>
#include <stdint.h>
#include "sds.h"
+#include "connection.h"
#define RIO_FLAG_READ_ERROR (1<<0)
#define RIO_FLAG_WRITE_ERROR (1<<1)
@@ -76,22 +77,20 @@ struct _rio {
off_t buffered; /* Bytes written since last fsync. */
off_t autosync; /* fsync after 'autosync' bytes written. */
} file;
- /* file descriptor */
+ /* Connection object (used to read from socket) */
struct {
- int fd; /* File descriptor. */
+ connection *conn; /* Connection */
off_t pos; /* pos in buf that was returned */
sds buf; /* buffered data */
size_t read_limit; /* don't allow to buffer/read more than that */
size_t read_so_far; /* amount of data read from the rio (not buffered) */
- } fd;
- /* Multiple FDs target (used to write to N sockets). */
+ } conn;
+ /* FD target (used to write to pipe). */
struct {
- int *fds; /* File descriptors. */
- int *state; /* Error state of each fd. 0 (if ok) or errno. */
- int numfds;
+ int fd; /* File descriptor. */
off_t pos;
sds buf;
- } fdset;
+ } fd;
} io;
};
@@ -159,11 +158,11 @@ static inline void rioClearErrors(rio *r) {
void rioInitWithFile(rio *r, FILE *fp);
void rioInitWithBuffer(rio *r, sds s);
-void rioInitWithFd(rio *r, int fd, size_t read_limit);
-void rioInitWithFdset(rio *r, int *fds, int numfds);
+void rioInitWithConn(rio *r, connection *conn, size_t read_limit);
+void rioInitWithFd(rio *r, int fd);
-void rioFreeFdset(rio *r);
-void rioFreeFd(rio *r, sds* out_remainingBufferedData);
+void rioFreeFd(rio *r);
+void rioFreeConn(rio *r, sds* out_remainingBufferedData);
size_t rioWriteBulkCount(rio *r, char prefix, long count);
size_t rioWriteBulkString(rio *r, const char *buf, size_t len);
diff --git a/src/scripting.c b/src/scripting.c
index ec95eb256..7cf21f408 100644
--- a/src/scripting.c
+++ b/src/scripting.c
@@ -61,7 +61,7 @@ sds ldbCatStackValue(sds s, lua_State *lua, int idx);
#define LDB_BREAKPOINTS_MAX 64 /* Max number of breakpoints. */
#define LDB_MAX_LEN_DEFAULT 256 /* Default len limit for replies / var dumps. */
struct ldbState {
- int fd; /* Socket of the debugging client. */
+ connection *conn; /* Connection of the debugging client. */
int active; /* Are we debugging EVAL right now? */
int forked; /* Is this a fork()ed debugging session? */
list *logs; /* List of messages to send to the client. */
@@ -1243,7 +1243,7 @@ void scriptingInit(int setup) {
* Note: there is no need to create it again when this function is called
* by scriptingReset(). */
if (server.lua_client == NULL) {
- server.lua_client = createClient(-1);
+ server.lua_client = createClient(NULL);
server.lua_client->flags |= CLIENT_LUA;
}
@@ -1734,7 +1734,7 @@ NULL
/* Initialize Lua debugger data structures. */
void ldbInit(void) {
- ldb.fd = -1;
+ ldb.conn = NULL;
ldb.active = 0;
ldb.logs = listCreate();
listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree);
@@ -1756,7 +1756,7 @@ void ldbFlushLog(list *log) {
void ldbEnable(client *c) {
c->flags |= CLIENT_LUA_DEBUG;
ldbFlushLog(ldb.logs);
- ldb.fd = c->fd;
+ ldb.conn = c->conn;
ldb.step = 1;
ldb.bpcount = 0;
ldb.luabp = 0;
@@ -1811,7 +1811,7 @@ void ldbSendLogs(void) {
proto = sdscatlen(proto,"\r\n",2);
listDelNode(ldb.logs,ln);
}
- if (write(ldb.fd,proto,sdslen(proto)) == -1) {
+ if (connWrite(ldb.conn,proto,sdslen(proto)) == -1) {
/* Avoid warning. We don't check the return value of write()
* since the next read() will catch the I/O error and will
* close the debugging session. */
@@ -1863,8 +1863,8 @@ int ldbStartSession(client *c) {
}
/* Setup our debugging session. */
- anetBlock(NULL,ldb.fd);
- anetSendTimeout(NULL,ldb.fd,5000);
+ connBlock(ldb.conn);
+ connSendTimeout(ldb.conn,5000);
ldb.active = 1;
/* First argument of EVAL is the script itself. We split it into different
@@ -1891,7 +1891,7 @@ void ldbEndSession(client *c) {
/* If it's a fork()ed session, we just exit. */
if (ldb.forked) {
- writeToClient(c->fd, c, 0);
+ writeToClient(c,0);
serverLog(LL_WARNING,"Lua debugging session child exiting");
exitFromChild(0);
} else {
@@ -1900,8 +1900,8 @@ void ldbEndSession(client *c) {
}
/* Otherwise let's restore client's state. */
- anetNonBlock(NULL,ldb.fd);
- anetSendTimeout(NULL,ldb.fd,0);
+ connNonBlock(ldb.conn);
+ connSendTimeout(ldb.conn,0);
/* Close the client connectin after sending the final EVAL reply
* in order to signal the end of the debugging session. */
@@ -2538,7 +2538,7 @@ int ldbRepl(lua_State *lua) {
while(1) {
while((argv = ldbReplParseCommand(&argc)) == NULL) {
char buf[1024];
- int nread = read(ldb.fd,buf,sizeof(buf));
+ int nread = connRead(ldb.conn,buf,sizeof(buf));
if (nread <= 0) {
/* Make sure the script runs without user input since the
* client is no longer connected. */
diff --git a/src/sentinel.c b/src/sentinel.c
index 92ea75436..0490db4e9 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -30,6 +30,10 @@
#include "server.h"
#include "hiredis.h"
+#ifdef USE_OPENSSL
+#include "openssl/ssl.h"
+#include "hiredis_ssl.h"
+#endif
#include "async.h"
#include <ctype.h>
@@ -40,6 +44,10 @@
extern char **environ;
+#ifdef USE_OPENSSL
+extern SSL_CTX *redis_tls_ctx;
+#endif
+
#define REDIS_SENTINEL_PORT 26379
/* ======================== Sentinel global state =========================== */
@@ -1995,6 +2003,19 @@ void sentinelSetClientName(sentinelRedisInstance *ri, redisAsyncContext *c, char
}
}
+static int instanceLinkNegotiateTLS(redisAsyncContext *context) {
+#ifndef USE_OPENSSL
+ (void) context;
+#else
+ if (!redis_tls_ctx) return C_ERR;
+ SSL *ssl = SSL_new(redis_tls_ctx);
+ if (!ssl) return C_ERR;
+
+ if (redisInitiateSSL(&context->c, ssl) == REDIS_ERR) return C_ERR;
+#endif
+ return C_OK;
+}
+
/* Create the async connections for the instance link if the link
* is disconnected. Note that link->disconnected is true even if just
* one of the two links (commands and pub/sub) is missing. */
@@ -2010,7 +2031,11 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
/* Commands connection. */
if (link->cc == NULL) {
link->cc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
- if (link->cc->err) {
+ if (!link->cc->err && server.tls_replication &&
+ (instanceLinkNegotiateTLS(link->cc) == C_ERR)) {
+ sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #Failed to initialize TLS");
+ instanceLinkCloseConnection(link,link->cc);
+ } else if (link->cc->err) {
sentinelEvent(LL_DEBUG,"-cmd-link-reconnection",ri,"%@ #%s",
link->cc->errstr);
instanceLinkCloseConnection(link,link->cc);
@@ -2033,7 +2058,10 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
/* Pub / Sub */
if ((ri->flags & (SRI_MASTER|SRI_SLAVE)) && link->pc == NULL) {
link->pc = redisAsyncConnectBind(ri->addr->ip,ri->addr->port,NET_FIRST_BIND_ADDR);
- if (link->pc->err) {
+ if (!link->pc->err && server.tls_replication &&
+ (instanceLinkNegotiateTLS(link->pc) == C_ERR)) {
+ sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #Failed to initialize TLS");
+ } else if (link->pc->err) {
sentinelEvent(LL_DEBUG,"-pubsub-link-reconnection",ri,"%@ #%s",
link->pc->errstr);
instanceLinkCloseConnection(link,link->pc);
@@ -2584,8 +2612,9 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
return C_ERR;
announce_ip = ip;
}
- announce_port = sentinel.announce_port ?
- sentinel.announce_port : server.port;
+ if (sentinel.announce_port) announce_port = sentinel.announce_port;
+ else if (server.tls_replication && server.tls_port) announce_port = server.tls_port;
+ else announce_port = server.port;
/* Format and send the Hello message. */
snprintf(payload,sizeof(payload),
diff --git a/src/server.c b/src/server.c
index f67175651..d16ff0a8e 100644
--- a/src/server.c
+++ b/src/server.c
@@ -1752,6 +1752,62 @@ void updateCachedTime(void) {
server.daylight_active = tm.tm_isdst;
}
+void checkChildrenDone(void) {
+ int statloc;
+ pid_t pid;
+
+ /* If we have a diskless rdb child (note that we support only one concurrent
+ * child), we want to avoid collecting it's exit status and acting on it
+ * as long as we didn't finish to drain the pipe, since then we're at risk
+ * of starting a new fork and a new pipe before we're done with the previous
+ * one. */
+ if (server.rdb_child_pid != -1 && server.rdb_pipe_conns)
+ return;
+
+ if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
+ int exitcode = WEXITSTATUS(statloc);
+ int bysignal = 0;
+
+ if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
+
+ /* sigKillChildHandler catches the signal and calls exit(), but we
+ * must make sure not to flag lastbgsave_status, etc incorrectly.
+ * We could directly terminate the child process via SIGUSR1
+ * without handling it, but in this case Valgrind will log an
+ * annoying error. */
+ if (exitcode == SERVER_CHILD_NOERROR_RETVAL) {
+ bysignal = SIGUSR1;
+ exitcode = 1;
+ }
+
+ if (pid == -1) {
+ serverLog(LL_WARNING,"wait3() returned an error: %s. "
+ "rdb_child_pid = %d, aof_child_pid = %d, module_child_pid = %d",
+ strerror(errno),
+ (int) server.rdb_child_pid,
+ (int) server.aof_child_pid,
+ (int) server.module_child_pid);
+ } else if (pid == server.rdb_child_pid) {
+ backgroundSaveDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
+ } else if (pid == server.aof_child_pid) {
+ backgroundRewriteDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
+ } else if (pid == server.module_child_pid) {
+ ModuleForkDoneHandler(exitcode,bysignal);
+ if (!bysignal && exitcode == 0) receiveChildInfo();
+ } else {
+ if (!ldbRemoveChild(pid)) {
+ serverLog(LL_WARNING,
+ "Warning, detected child with unmatched pid: %ld",
+ (long)pid);
+ }
+ }
+ updateDictResizePolicy();
+ closeChildInfoPipe();
+ }
+}
+
/* This is our timer interrupt, called server.hz times per second.
* Here is where we do a number of things that need to be done asynchronously.
* For instance:
@@ -1903,51 +1959,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Check if a background saving or AOF rewrite in progress terminated. */
if (hasActiveChildProcess() || ldbPendingChildren())
{
- int statloc;
- pid_t pid;
-
- if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
- int exitcode = WEXITSTATUS(statloc);
- int bysignal = 0;
-
- if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
-
- /* sigKillChildHandler catches the signal and calls exit(), but we
- * must make sure not to flag lastbgsave_status, etc incorrectly.
- * We could directly terminate the child process via SIGUSR1
- * without handling it, but in this case Valgrind will log an
- * annoying error. */
- if (exitcode == SERVER_CHILD_NOERROR_RETVAL) {
- bysignal = SIGUSR1;
- exitcode = 1;
- }
-
- if (pid == -1) {
- serverLog(LL_WARNING,"wait3() returned an error: %s. "
- "rdb_child_pid = %d, aof_child_pid = %d, module_child_pid = %d",
- strerror(errno),
- (int) server.rdb_child_pid,
- (int) server.aof_child_pid,
- (int) server.module_child_pid);
- } else if (pid == server.rdb_child_pid) {
- backgroundSaveDoneHandler(exitcode,bysignal);
- if (!bysignal && exitcode == 0) receiveChildInfo();
- } else if (pid == server.aof_child_pid) {
- backgroundRewriteDoneHandler(exitcode,bysignal);
- if (!bysignal && exitcode == 0) receiveChildInfo();
- } else if (pid == server.module_child_pid) {
- ModuleForkDoneHandler(exitcode,bysignal);
- if (!bysignal && exitcode == 0) receiveChildInfo();
- } else {
- if (!ldbRemoveChild(pid)) {
- serverLog(LL_WARNING,
- "Warning, detected child with unmatched pid: %ld",
- (long)pid);
- }
- }
- updateDictResizePolicy();
- closeChildInfoPipe();
- }
+ checkChildrenDone();
} else {
/* If there is not a background saving/rewrite in progress check if
* we have to save/rewrite now. */
@@ -2054,6 +2066,11 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
void beforeSleep(struct aeEventLoop *eventLoop) {
UNUSED(eventLoop);
+ /* Handle TLS pending data. (must be done before flushAppendOnlyFile) */
+ tlsProcessPendingData();
+ /* If tls still has pending unread data don't sleep at all. */
+ aeSetDontWait(server.el, tlsHasPendingData());
+
/* Call the Redis Cluster before sleep function. Note that this function
* may change the state of Redis Cluster (from ok to fail or vice versa),
* so it's a good idea to call it before serving the unblocked clients
@@ -2247,11 +2264,13 @@ void initServerConfig(void) {
server.dynamic_hz = CONFIG_DEFAULT_DYNAMIC_HZ;
server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
server.port = CONFIG_DEFAULT_SERVER_PORT;
+ server.tls_port = CONFIG_DEFAULT_SERVER_TLS_PORT;
server.tcp_backlog = CONFIG_DEFAULT_TCP_BACKLOG;
server.bindaddr_count = 0;
server.unixsocket = NULL;
server.unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM;
server.ipfd_count = 0;
+ server.tlsfd_count = 0;
server.sofd = -1;
server.protected_mode = CONFIG_DEFAULT_PROTECTED_MODE;
server.gopher_enabled = CONFIG_DEFAULT_GOPHER_ENABLED;
@@ -2286,6 +2305,7 @@ void initServerConfig(void) {
server.aof_rewrite_min_size = AOF_REWRITE_MIN_SIZE;
server.aof_rewrite_base_size = 0;
server.aof_rewrite_scheduled = 0;
+ server.aof_flush_sleep = 0;
server.aof_last_fsync = time(NULL);
server.aof_rewrite_time_last = -1;
server.aof_rewrite_time_start = -1;
@@ -2297,6 +2317,7 @@ void initServerConfig(void) {
server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC;
server.rdb_key_save_delay = CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY;
+ server.key_load_delay = CONFIG_DEFAULT_KEY_LOAD_DELAY;
server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE;
server.pidfile = NULL;
@@ -2368,7 +2389,7 @@ void initServerConfig(void) {
server.repl_state = REPL_STATE_NONE;
server.repl_transfer_tmpfile = NULL;
server.repl_transfer_fd = -1;
- server.repl_transfer_s = -1;
+ server.repl_transfer_s = NULL;
server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA;
server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY;
@@ -2765,6 +2786,11 @@ void initServer(void) {
server.clients_paused = 0;
server.system_memory_size = zmalloc_get_memory_size();
+ if (server.tls_port && tlsConfigure(&server.tls_ctx_config) == C_ERR) {
+ serverLog(LL_WARNING, "Failed to configure TLS. Check logs for more info.");
+ exit(1);
+ }
+
createSharedObjects();
adjustOpenFilesLimit();
server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
@@ -2780,6 +2806,9 @@ void initServer(void) {
if (server.port != 0 &&
listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
exit(1);
+ if (server.tls_port != 0 &&
+ listenToPort(server.tls_port,server.tlsfd,&server.tlsfd_count) == C_ERR)
+ exit(1);
/* Open the listening Unix domain socket. */
if (server.unixsocket != NULL) {
@@ -2794,7 +2823,7 @@ void initServer(void) {
}
/* Abort if there are no listening sockets at all. */
- if (server.ipfd_count == 0 && server.sofd < 0) {
+ if (server.ipfd_count == 0 && server.tlsfd_count == 0 && server.sofd < 0) {
serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
exit(1);
}
@@ -2820,6 +2849,11 @@ void initServer(void) {
server.aof_child_pid = -1;
server.module_child_pid = -1;
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
+ server.rdb_pipe_conns = NULL;
+ server.rdb_pipe_numconns = 0;
+ server.rdb_pipe_numconns_writing = 0;
+ server.rdb_pipe_buff = NULL;
+ server.rdb_pipe_bufflen = 0;
server.rdb_bgsave_scheduled = 0;
server.child_info_pipe[0] = -1;
server.child_info_pipe[1] = -1;
@@ -2866,6 +2900,14 @@ void initServer(void) {
"Unrecoverable error creating server.ipfd file event.");
}
}
+ for (j = 0; j < server.tlsfd_count; j++) {
+ if (aeCreateFileEvent(server.el, server.tlsfd[j], AE_READABLE,
+ acceptTLSHandler,NULL) == AE_ERR)
+ {
+ serverPanic(
+ "Unrecoverable error creating server.tlsfd file event.");
+ }
+ }
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
@@ -3570,6 +3612,7 @@ void closeListeningSockets(int unlink_unix_socket) {
int j;
for (j = 0; j < server.ipfd_count; j++) close(server.ipfd[j]);
+ for (j = 0; j < server.tlsfd_count; j++) close(server.tlsfd[j]);
if (server.sofd != -1) close(server.sofd);
if (server.cluster_enabled)
for (j = 0; j < server.cfd_count; j++) close(server.cfd[j]);
@@ -3940,7 +3983,7 @@ sds genRedisInfoString(char *section) {
#endif
(int64_t) getpid(),
server.runid,
- server.port,
+ server.port ? server.port : server.tls_port,
(int64_t)uptime,
(int64_t)(uptime/(3600*24)),
server.hz,
@@ -4324,7 +4367,7 @@ sds genRedisInfoString(char *section) {
long lag = 0;
if (slaveip[0] == '\0') {
- if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1)
+ if (connPeerToString(slave->conn,ip,sizeof(ip),&port) == -1)
continue;
slaveip = ip;
}
@@ -4578,7 +4621,7 @@ void redisAsciiArt(void) {
if (!show_logo) {
serverLog(LL_NOTICE,
"Running mode=%s, port=%d.",
- mode, server.port
+ mode, server.port ? server.port : server.tls_port
);
} else {
snprintf(buf,1024*16,ascii_logo,
@@ -4586,7 +4629,7 @@ void redisAsciiArt(void) {
redisGitSHA1(),
strtol(redisGitDirty(),NULL,10) > 0,
(sizeof(long) == 8) ? "64" : "32",
- mode, server.port,
+ mode, server.port ? server.port : server.tls_port,
(long) getpid()
);
serverLogRaw(LL_NOTICE|LL_RAW,buf);
@@ -4769,7 +4812,7 @@ void redisSetProcTitle(char *title) {
setproctitle("%s %s:%d%s",
title,
server.bindaddr_count ? server.bindaddr[0] : "*",
- server.port,
+ server.port ? server.port : server.tls_port,
server_mode);
#else
UNUSED(title);
@@ -4920,6 +4963,7 @@ int main(int argc, char **argv) {
ACLInit(); /* The ACL subsystem must be initialized ASAP because the
basic networking code and client creation depends on it. */
moduleInitModulesSystem();
+ tlsInit();
/* Store the executable path and arguments in a safe place in order
* to be able to restart the server later. */
@@ -5053,7 +5097,7 @@ int main(int argc, char **argv) {
exit(1);
}
}
- if (server.ipfd_count > 0)
+ if (server.ipfd_count > 0 || server.tlsfd_count > 0)
serverLog(LL_NOTICE,"Ready to accept connections");
if (server.sofd > 0)
serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
diff --git a/src/server.h b/src/server.h
index a14989237..00b54bd35 100644
--- a/src/server.h
+++ b/src/server.h
@@ -66,6 +66,7 @@ typedef long long mstime_t; /* millisecond time type. */
#include "quicklist.h" /* Lists are encoded as linked lists of
N-elements flat arrays */
#include "rax.h" /* Radix tree */
+#include "connection.h" /* Connection abstraction */
/* Following includes allow test functions to be called from Redis main() */
#include "zipmap.h"
@@ -84,6 +85,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_MAX_HZ 500
#define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */
#define CONFIG_DEFAULT_SERVER_PORT 6379 /* TCP port. */
+#define CONFIG_DEFAULT_SERVER_TLS_PORT 0 /* TCP port. */
#define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */
#define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */
#define CONFIG_DEFAULT_DBNUM 16
@@ -133,6 +135,7 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0
#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5
#define CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY 0
+#define CONFIG_DEFAULT_KEY_LOAD_DELAY 0
#define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1
#define CONFIG_DEFAULT_SLAVE_READ_ONLY 1
#define CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY 1
@@ -826,7 +829,7 @@ typedef struct user {
* Clients are taken in a linked list. */
typedef struct client {
uint64_t id; /* Client incremental unique ID. */
- int fd; /* Client socket. */
+ connection *conn;
int resp; /* RESP protocol version. Can be 2 or 3. */
redisDb *db; /* Pointer to currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
@@ -1035,6 +1038,22 @@ struct malloc_stats {
};
/*-----------------------------------------------------------------------------
+ * TLS Context Configuration
+ *----------------------------------------------------------------------------*/
+
+typedef struct redisTLSContextConfig {
+ char *cert_file;
+ char *key_file;
+ char *dh_params_file;
+ char *ca_cert_file;
+ char *ca_cert_dir;
+ char *protocols;
+ char *ciphers;
+ char *ciphersuites;
+ int prefer_server_ciphers;
+} redisTLSContextConfig;
+
+/*-----------------------------------------------------------------------------
* Global server state
*----------------------------------------------------------------------------*/
@@ -1088,6 +1107,7 @@ struct redisServer {
pid_t module_child_pid; /* PID of module child */
/* Networking */
int port; /* TCP listening port */
+ int tls_port; /* TLS listening port */
int tcp_backlog; /* TCP listen() backlog */
char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
@@ -1095,6 +1115,8 @@ struct redisServer {
mode_t unixsocketperm; /* UNIX socket permission */
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */
+ int tlsfd[CONFIG_BINDADDR_MAX]; /* TLS socket file descriptors */
+ int tlsfd_count; /* Used slots in tlsfd[] */
int sofd; /* Unix socket file descriptor */
int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
int cfd_count; /* Used slots in cfd[] */
@@ -1198,6 +1220,7 @@ struct redisServer {
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size. */
off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */
+ int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
pid_t aof_child_pid; /* PID if rewriting process */
list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
@@ -1243,10 +1266,17 @@ struct redisServer {
int rdb_child_type; /* Type of save by active child. */
int lastbgsave_status; /* C_OK or C_ERR */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
- int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
- int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
+ int rdb_pipe_write; /* RDB pipes used to transfer the rdb */
+ int rdb_pipe_read; /* data to the parent process in diskless repl. */
+ connection **rdb_pipe_conns; /* Connections which are currently the */
+ int rdb_pipe_numconns; /* target of diskless rdb fork child. */
+ int rdb_pipe_numconns_writing; /* Number of rdb conns with pending writes. */
+ char *rdb_pipe_buff; /* In diskless replication, this buffer holds data */
+ int rdb_pipe_bufflen; /* that was read from the the rdb pipe. */
int rdb_key_save_delay; /* Delay in microseconds between keys while
* writing the RDB. (for testings) */
+ int key_load_delay; /* Delay in microseconds between keys while
+ * loading aof or rdb. (for testings) */
/* Pipe and data structures for child -> parent info sharing. */
int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
struct {
@@ -1299,7 +1329,7 @@ struct redisServer {
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
- int repl_transfer_s; /* Slave -> Master SYNC socket */
+ connection *repl_transfer_s; /* Slave -> Master SYNC connection */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
@@ -1423,6 +1453,11 @@ struct redisServer {
int watchdog_period; /* Software watchdog period in ms. 0 = off */
/* System hardware info */
size_t system_memory_size; /* Total memory in system as reported by OS */
+ /* TLS Configuration */
+ int tls_cluster;
+ int tls_replication;
+ int tls_auth_clients;
+ redisTLSContextConfig tls_ctx_config;
};
typedef struct pubsubPattern {
@@ -1570,12 +1605,12 @@ size_t redisPopcount(void *s, long count);
void redisSetProcTitle(char *title);
/* networking.c -- Networking and Client related operations */
-client *createClient(int fd);
+client *createClient(connection *conn);
void closeTimedoutClients(void);
void freeClient(client *c);
void freeClientAsync(client *c);
void resetClient(client *c);
-void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void sendReplyToClient(connection *conn);
void *addReplyDeferredLen(client *c);
void setDeferredArrayLen(client *c, void *node, long length);
void setDeferredMapLen(client *c, void *node, long length);
@@ -1587,8 +1622,9 @@ void processInputBufferAndReplicate(client *c);
void processGopherRequest(client *c);
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
+void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask);
void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
-void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
+void readQueryFromClient(connection *conn);
void addReplyNull(client *c);
void addReplyNullArray(client *c);
void addReplyBool(client *c, int b);
@@ -1646,7 +1682,7 @@ int handleClientsWithPendingReadsUsingThreads(void);
int stopThreadedIOIfNeeded(void);
int clientHasPendingReplies(client *c);
void unlinkClient(client *c);
-int writeToClient(int fd, client *c, int handler_installed);
+int writeToClient(client *c, int handler_installed);
void linkClient(client *c);
void protectClient(client *c);
void unprotectClient(client *c);
@@ -1782,6 +1818,8 @@ void clearReplicationId2(void);
void chopReplicationBacklog(void);
void replicationCacheMasterUsingMyself(void);
void feedReplicationBacklog(void *ptr, size_t len);
+void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
+void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
/* Generic persistence functions */
void startLoadingFile(FILE* fp, char* filename);
@@ -1954,6 +1992,7 @@ unsigned int LRU_CLOCK(void);
const char *evictPolicyToString(void);
struct redisMemOverhead *getMemoryOverheadData(void);
void freeMemoryOverheadData(struct redisMemOverhead *mh);
+void checkChildrenDone(void);
#define RESTART_SERVER_NONE 0
#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
@@ -2369,6 +2408,10 @@ void mixDigest(unsigned char *digest, void *ptr, size_t len);
void xorDigest(unsigned char *digest, void *ptr, size_t len);
int populateCommandTableParseFlags(struct redisCommand *c, char *strflags);
+/* TLS stuff */
+void tlsInit(void);
+int tlsConfigure(redisTLSContextConfig *ctx_config);
+
#define redisDebug(fmt, ...) \
printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
#define redisDebugMark() \
diff --git a/src/tls.c b/src/tls.c
new file mode 100644
index 000000000..5fac6902b
--- /dev/null
+++ b/src/tls.c
@@ -0,0 +1,808 @@
+/*
+ * Copyright (c) 2019, Redis Labs
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Redis nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include "server.h"
+#include "connhelpers.h"
+#include "adlist.h"
+
+#ifdef USE_OPENSSL
+
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/rand.h>
+
+#define REDIS_TLS_PROTO_TLSv1 (1<<0)
+#define REDIS_TLS_PROTO_TLSv1_1 (1<<1)
+#define REDIS_TLS_PROTO_TLSv1_2 (1<<2)
+#define REDIS_TLS_PROTO_TLSv1_3 (1<<3)
+
+/* Use safe defaults */
+#ifdef TLS1_3_VERSION
+#define REDIS_TLS_PROTO_DEFAULT (REDIS_TLS_PROTO_TLSv1_2|REDIS_TLS_PROTO_TLSv1_3)
+#else
+#define REDIS_TLS_PROTO_DEFAULT (REDIS_TLS_PROTO_TLSv1_2)
+#endif
+
+extern ConnectionType CT_Socket;
+
+SSL_CTX *redis_tls_ctx;
+
+static int parseProtocolsConfig(const char *str) {
+ int i, count = 0;
+ int protocols = 0;
+
+ if (!str) return REDIS_TLS_PROTO_DEFAULT;
+ sds *tokens = sdssplitlen(str, strlen(str), " ", 1, &count);
+
+ if (!tokens) {
+ serverLog(LL_WARNING, "Invalid tls-protocols configuration string");
+ return -1;
+ }
+ for (i = 0; i < count; i++) {
+ if (!strcasecmp(tokens[i], "tlsv1")) protocols |= REDIS_TLS_PROTO_TLSv1;
+ else if (!strcasecmp(tokens[i], "tlsv1.1")) protocols |= REDIS_TLS_PROTO_TLSv1_1;
+ else if (!strcasecmp(tokens[i], "tlsv1.2")) protocols |= REDIS_TLS_PROTO_TLSv1_2;
+ else if (!strcasecmp(tokens[i], "tlsv1.3")) {
+#ifdef TLS1_3_VERSION
+ protocols |= REDIS_TLS_PROTO_TLSv1_3;
+#else
+ serverLog(LL_WARNING, "TLSv1.3 is specified in tls-protocols but not supported by OpenSSL.");
+ protocols = -1;
+ break;
+#endif
+ } else {
+ serverLog(LL_WARNING, "Invalid tls-protocols specified. "
+ "Use a combination of 'TLSv1', 'TLSv1.1', 'TLSv1.2' and 'TLSv1.3'.");
+ protocols = -1;
+ break;
+ }
+ }
+ sdsfreesplitres(tokens, count);
+
+ return protocols;
+}
+
+/* list of connections with pending data already read from the socket, but not
+ * served to the reader yet. */
+static list *pending_list = NULL;
+
+void tlsInit(void) {
+ ERR_load_crypto_strings();
+ SSL_load_error_strings();
+ SSL_library_init();
+
+ if (!RAND_poll()) {
+ serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator.");
+ }
+
+ pending_list = listCreate();
+
+ /* Server configuration */
+ server.tls_auth_clients = 1; /* Secure by default */
+}
+
+/* Attempt to configure/reconfigure TLS. This operation is atomic and will
+ * leave the SSL_CTX unchanged if fails.
+ */
+int tlsConfigure(redisTLSContextConfig *ctx_config) {
+ char errbuf[256];
+ SSL_CTX *ctx = NULL;
+
+ if (!ctx_config->cert_file) {
+ serverLog(LL_WARNING, "No tls-cert-file configured!");
+ goto error;
+ }
+
+ if (!ctx_config->key_file) {
+ serverLog(LL_WARNING, "No tls-key-file configured!");
+ goto error;
+ }
+
+ if (!ctx_config->ca_cert_file && !ctx_config->ca_cert_dir) {
+ serverLog(LL_WARNING, "Either tls-ca-cert-file or tls-ca-cert-dir must be configured!");
+ goto error;
+ }
+
+ ctx = SSL_CTX_new(SSLv23_method());
+
+ SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3);
+ SSL_CTX_set_options(ctx, SSL_OP_SINGLE_DH_USE);
+
+#ifdef SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS
+ SSL_CTX_set_options(ctx, SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS);
+#endif
+
+ int protocols = parseProtocolsConfig(ctx_config->protocols);
+ if (protocols == -1) goto error;
+
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1);
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_1))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1);
+#ifdef SSL_OP_NO_TLSv1_2
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_2))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_2);
+#endif
+#ifdef SSL_OP_NO_TLSv1_3
+ if (!(protocols & REDIS_TLS_PROTO_TLSv1_3))
+ SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_3);
+#endif
+
+#ifdef SSL_OP_NO_COMPRESSION
+ SSL_CTX_set_options(ctx, SSL_OP_NO_COMPRESSION);
+#endif
+
+#ifdef SSL_OP_NO_CLIENT_RENEGOTIATION
+ SSL_CTX_set_options(ssl->ctx, SSL_OP_NO_CLIENT_RENEGOTIATION);
+#endif
+
+ if (ctx_config->prefer_server_ciphers)
+ SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
+
+ SSL_CTX_set_mode(ctx, SSL_MODE_ENABLE_PARTIAL_WRITE|SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
+ SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
+ SSL_CTX_set_ecdh_auto(ctx, 1);
+
+ if (SSL_CTX_use_certificate_file(ctx, ctx_config->cert_file, SSL_FILETYPE_PEM) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load certificate: %s: %s", ctx_config->cert_file, errbuf);
+ goto error;
+ }
+
+ if (SSL_CTX_use_PrivateKey_file(ctx, ctx_config->key_file, SSL_FILETYPE_PEM) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load private key: %s: %s", ctx_config->key_file, errbuf);
+ goto error;
+ }
+
+ if (SSL_CTX_load_verify_locations(ctx, ctx_config->ca_cert_file, ctx_config->ca_cert_dir) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to configure CA certificate(s) file/directory: %s", errbuf);
+ goto error;
+ }
+
+ if (ctx_config->dh_params_file) {
+ FILE *dhfile = fopen(ctx_config->dh_params_file, "r");
+ DH *dh = NULL;
+ if (!dhfile) {
+ serverLog(LL_WARNING, "Failed to load %s: %s", ctx_config->dh_params_file, strerror(errno));
+ goto error;
+ }
+
+ dh = PEM_read_DHparams(dhfile, NULL, NULL, NULL);
+ fclose(dhfile);
+ if (!dh) {
+ serverLog(LL_WARNING, "%s: failed to read DH params.", ctx_config->dh_params_file);
+ goto error;
+ }
+
+ if (SSL_CTX_set_tmp_dh(ctx, dh) <= 0) {
+ ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf));
+ serverLog(LL_WARNING, "Failed to load DH params file: %s: %s", ctx_config->dh_params_file, errbuf);
+ DH_free(dh);
+ goto error;
+ }
+
+ DH_free(dh);
+ }
+
+ if (ctx_config->ciphers && !SSL_CTX_set_cipher_list(ctx, ctx_config->ciphers)) {
+ serverLog(LL_WARNING, "Failed to configure ciphers: %s", ctx_config->ciphers);
+ goto error;
+ }
+
+#ifdef TLS1_3_VERSION
+ if (ctx_config->ciphersuites && !SSL_CTX_set_ciphersuites(ctx, ctx_config->ciphersuites)) {
+ serverLog(LL_WARNING, "Failed to configure ciphersuites: %s", ctx_config->ciphersuites);
+ goto error;
+ }
+#endif
+
+ SSL_CTX_free(redis_tls_ctx);
+ redis_tls_ctx = ctx;
+
+ return C_OK;
+
+error:
+ if (ctx) SSL_CTX_free(ctx);
+ return C_ERR;
+}
+
+#ifdef TLS_DEBUGGING
+#define TLSCONN_DEBUG(fmt, ...) \
+ serverLog(LL_DEBUG, "TLSCONN: " fmt, __VA_ARGS__)
+#else
+#define TLSCONN_DEBUG(fmt, ...)
+#endif
+
+ConnectionType CT_TLS;
+
+/* Normal socket connections have a simple events/handler correlation.
+ *
+ * With TLS connections we need to handle cases where during a logical read
+ * or write operation, the SSL library asks to block for the opposite
+ * socket operation.
+ *
+ * When this happens, we need to do two things:
+ * 1. Make sure we register for the even.
+ * 2. Make sure we know which handler needs to execute when the
+ * event fires. That is, if we notify the caller of a write operation
+ * that it blocks, and SSL asks for a read, we need to trigger the
+ * write handler again on the next read event.
+ *
+ */
+
+typedef enum {
+ WANT_READ = 1,
+ WANT_WRITE
+} WantIOType;
+
+#define TLS_CONN_FLAG_READ_WANT_WRITE (1<<0)
+#define TLS_CONN_FLAG_WRITE_WANT_READ (1<<1)
+#define TLS_CONN_FLAG_FD_SET (1<<2)
+
+typedef struct tls_connection {
+ connection c;
+ int flags;
+ SSL *ssl;
+ char *ssl_error;
+ listNode *pending_list_node;
+} tls_connection;
+
+connection *connCreateTLS(void) {
+ tls_connection *conn = zcalloc(sizeof(tls_connection));
+ conn->c.type = &CT_TLS;
+ conn->c.fd = -1;
+ conn->ssl = SSL_new(redis_tls_ctx);
+ return (connection *) conn;
+}
+
+connection *connCreateAcceptedTLS(int fd, int require_auth) {
+ tls_connection *conn = (tls_connection *) connCreateTLS();
+ conn->c.fd = fd;
+ conn->c.state = CONN_STATE_ACCEPTING;
+
+ if (!require_auth) {
+ /* We still verify certificates if provided, but don't require them.
+ */
+ SSL_set_verify(conn->ssl, SSL_VERIFY_PEER, NULL);
+ }
+
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ SSL_set_accept_state(conn->ssl);
+
+ return (connection *) conn;
+}
+
+static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask);
+
+/* Process the return code received from OpenSSL>
+ * Update the want parameter with expected I/O.
+ * Update the connection's error state if a real error has occured.
+ * Returns an SSL error code, or 0 if no further handling is required.
+ */
+static int handleSSLReturnCode(tls_connection *conn, int ret_value, WantIOType *want) {
+ if (ret_value <= 0) {
+ int ssl_err = SSL_get_error(conn->ssl, ret_value);
+ switch (ssl_err) {
+ case SSL_ERROR_WANT_WRITE:
+ *want = WANT_WRITE;
+ return 0;
+ case SSL_ERROR_WANT_READ:
+ *want = WANT_READ;
+ return 0;
+ case SSL_ERROR_SYSCALL:
+ conn->c.last_errno = errno;
+ if (conn->ssl_error) zfree(conn->ssl_error);
+ conn->ssl_error = errno ? zstrdup(strerror(errno)) : NULL;
+ break;
+ default:
+ /* Error! */
+ conn->c.last_errno = 0;
+ if (conn->ssl_error) zfree(conn->ssl_error);
+ conn->ssl_error = zmalloc(512);
+ ERR_error_string_n(ERR_get_error(), conn->ssl_error, 512);
+ break;
+ }
+
+ return ssl_err;
+ }
+
+ return 0;
+}
+
+void registerSSLEvent(tls_connection *conn, WantIOType want) {
+ int mask = aeGetFileEvents(server.el, conn->c.fd);
+
+ switch (want) {
+ case WANT_READ:
+ if (mask & AE_WRITABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
+ if (!(mask & AE_READABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE,
+ tlsEventHandler, conn);
+ break;
+ case WANT_WRITE:
+ if (mask & AE_READABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
+ if (!(mask & AE_WRITABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE,
+ tlsEventHandler, conn);
+ break;
+ default:
+ serverAssert(0);
+ break;
+ }
+}
+
+void updateSSLEvent(tls_connection *conn) {
+ int mask = aeGetFileEvents(server.el, conn->c.fd);
+ int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
+ int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
+
+ if (need_read && !(mask & AE_READABLE))
+ aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
+ if (!need_read && (mask & AE_READABLE))
+ aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
+
+ if (need_write && !(mask & AE_WRITABLE))
+ aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
+ if (!need_write && (mask & AE_WRITABLE))
+ aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
+}
+
+static void tlsHandleEvent(tls_connection *conn, int mask) {
+ int ret;
+
+ TLSCONN_DEBUG("tlsEventHandler(): fd=%d, state=%d, mask=%d, r=%d, w=%d, flags=%d",
+ fd, conn->c.state, mask, conn->c.read_handler != NULL, conn->c.write_handler != NULL,
+ conn->flags);
+
+ ERR_clear_error();
+
+ switch (conn->c.state) {
+ case CONN_STATE_CONNECTING:
+ if (connGetSocketError((connection *) conn)) {
+ conn->c.last_errno = errno;
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ if (!(conn->flags & TLS_CONN_FLAG_FD_SET)) {
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ conn->flags |= TLS_CONN_FLAG_FD_SET;
+ }
+ ret = SSL_connect(conn->ssl);
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ registerSSLEvent(conn, want);
+
+ /* Avoid hitting UpdateSSLEvent, which knows nothing
+ * of what SSL_connect() wants and instead looks at our
+ * R/W handlers.
+ */
+ return;
+ }
+
+ /* If not handled, it's an error */
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ conn->c.state = CONN_STATE_CONNECTED;
+ }
+ }
+
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return;
+ conn->c.conn_handler = NULL;
+ break;
+ case CONN_STATE_ACCEPTING:
+ ret = SSL_accept(conn->ssl);
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ /* Avoid hitting UpdateSSLEvent, which knows nothing
+ * of what SSL_connect() wants and instead looks at our
+ * R/W handlers.
+ */
+ registerSSLEvent(conn, want);
+ return;
+ }
+
+ /* If not handled, it's an error */
+ conn->c.state = CONN_STATE_ERROR;
+ } else {
+ conn->c.state = CONN_STATE_CONNECTED;
+ }
+
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return;
+ conn->c.conn_handler = NULL;
+ break;
+ case CONN_STATE_CONNECTED:
+ {
+ int call_read = ((mask & AE_READABLE) && conn->c.read_handler) ||
+ ((mask & AE_WRITABLE) && (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE));
+ int call_write = ((mask & AE_WRITABLE) && conn->c.write_handler) ||
+ ((mask & AE_READABLE) && (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ));
+
+ /* Normally we execute the readable event first, and the writable
+ * event laster. This is useful as sometimes we may be able
+ * to serve the reply of a query immediately after processing the
+ * query.
+ *
+ * However if WRITE_BARRIER is set in the mask, our application is
+ * asking us to do the reverse: never fire the writable event
+ * after the readable. In such a case, we invert the calls.
+ * This is useful when, for instance, we want to do things
+ * in the beforeSleep() hook, like fsynching a file to disk,
+ * before replying to a client. */
+ int invert = conn->c.flags & CONN_FLAG_WRITE_BARRIER;
+
+ if (!invert && call_read) {
+ conn->flags &= ~TLS_CONN_FLAG_READ_WANT_WRITE;
+ if (!callHandler((connection *) conn, conn->c.read_handler)) return;
+ }
+
+ /* Fire the writable event. */
+ if (call_write) {
+ conn->flags &= ~TLS_CONN_FLAG_WRITE_WANT_READ;
+ if (!callHandler((connection *) conn, conn->c.write_handler)) return;
+ }
+
+ /* If we have to invert the call, fire the readable event now
+ * after the writable one. */
+ if (invert && call_read) {
+ conn->flags &= ~TLS_CONN_FLAG_READ_WANT_WRITE;
+ if (!callHandler((connection *) conn, conn->c.read_handler)) return;
+ }
+
+ /* If SSL has pending that, already read from the socket, we're at
+ * risk of not calling the read handler again, make sure to add it
+ * to a list of pending connection that should be handled anyway. */
+ if ((mask & AE_READABLE)) {
+ if (SSL_pending(conn->ssl) > 0) {
+ if (!conn->pending_list_node) {
+ listAddNodeTail(pending_list, conn);
+ conn->pending_list_node = listLast(pending_list);
+ }
+ } else if (conn->pending_list_node) {
+ listDelNode(pending_list, conn->pending_list_node);
+ conn->pending_list_node = NULL;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+
+ updateSSLEvent(conn);
+}
+
+static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
+ UNUSED(el);
+ UNUSED(fd);
+ tls_connection *conn = clientData;
+ tlsHandleEvent(conn, mask);
+}
+
+static void connTLSClose(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->ssl) {
+ SSL_free(conn->ssl);
+ conn->ssl = NULL;
+ }
+
+ if (conn->ssl_error) {
+ zfree(conn->ssl_error);
+ conn->ssl_error = NULL;
+ }
+
+ if (conn->pending_list_node) {
+ listDelNode(pending_list, conn->pending_list_node);
+ conn->pending_list_node = NULL;
+ }
+
+ CT_Socket.close(conn_);
+}
+
+static int connTLSAccept(connection *_conn, ConnectionCallbackFunc accept_handler) {
+ tls_connection *conn = (tls_connection *) _conn;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_ACCEPTING) return C_ERR;
+ ERR_clear_error();
+
+ /* Try to accept */
+ conn->c.conn_handler = accept_handler;
+ ret = SSL_accept(conn->ssl);
+
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!handleSSLReturnCode(conn, ret, &want)) {
+ registerSSLEvent(conn, want); /* We'll fire back */
+ return C_OK;
+ } else {
+ conn->c.state = CONN_STATE_ERROR;
+ return C_ERR;
+ }
+ }
+
+ conn->c.state = CONN_STATE_CONNECTED;
+ if (!callHandler((connection *) conn, conn->c.conn_handler)) return C_OK;
+ conn->c.conn_handler = NULL;
+
+ return C_OK;
+}
+
+static int connTLSConnect(connection *conn_, const char *addr, int port, const char *src_addr, ConnectionCallbackFunc connect_handler) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->c.state != CONN_STATE_NONE) return C_ERR;
+ ERR_clear_error();
+
+ /* Initiate Socket connection first */
+ if (CT_Socket.connect(conn_, addr, port, src_addr, connect_handler) == C_ERR) return C_ERR;
+
+ /* Return now, once the socket is connected we'll initiate
+ * TLS connection from the event handler.
+ */
+ return C_OK;
+}
+
+static int connTLSWrite(connection *conn_, const void *data, size_t data_len) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret, ssl_err;
+
+ if (conn->c.state != CONN_STATE_CONNECTED) return -1;
+ ERR_clear_error();
+ ret = SSL_write(conn->ssl, data, data_len);
+
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!(ssl_err = handleSSLReturnCode(conn, ret, &want))) {
+ if (want == WANT_READ) conn->flags |= TLS_CONN_FLAG_WRITE_WANT_READ;
+ updateSSLEvent(conn);
+ errno = EAGAIN;
+ return -1;
+ } else {
+ if (ssl_err == SSL_ERROR_ZERO_RETURN ||
+ ((ssl_err == SSL_ERROR_SYSCALL && !errno))) {
+ conn->c.state = CONN_STATE_CLOSED;
+ return 0;
+ } else {
+ conn->c.state = CONN_STATE_ERROR;
+ return -1;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int connTLSRead(connection *conn_, void *buf, size_t buf_len) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret;
+ int ssl_err;
+
+ if (conn->c.state != CONN_STATE_CONNECTED) return -1;
+ ERR_clear_error();
+ ret = SSL_read(conn->ssl, buf, buf_len);
+ if (ret <= 0) {
+ WantIOType want = 0;
+ if (!(ssl_err = handleSSLReturnCode(conn, ret, &want))) {
+ if (want == WANT_WRITE) conn->flags |= TLS_CONN_FLAG_READ_WANT_WRITE;
+ updateSSLEvent(conn);
+
+ errno = EAGAIN;
+ return -1;
+ } else {
+ if (ssl_err == SSL_ERROR_ZERO_RETURN ||
+ ((ssl_err == SSL_ERROR_SYSCALL) && !errno)) {
+ conn->c.state = CONN_STATE_CLOSED;
+ return 0;
+ } else {
+ conn->c.state = CONN_STATE_ERROR;
+ return -1;
+ }
+ }
+ }
+
+ return ret;
+}
+
+static const char *connTLSGetLastError(connection *conn_) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ if (conn->ssl_error) return conn->ssl_error;
+ return NULL;
+}
+
+int connTLSSetWriteHandler(connection *conn, ConnectionCallbackFunc func, int barrier) {
+ conn->write_handler = func;
+ if (barrier)
+ conn->flags |= CONN_FLAG_WRITE_BARRIER;
+ else
+ conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
+ updateSSLEvent((tls_connection *) conn);
+ return C_OK;
+}
+
+int connTLSSetReadHandler(connection *conn, ConnectionCallbackFunc func) {
+ conn->read_handler = func;
+ updateSSLEvent((tls_connection *) conn);
+ return C_OK;
+}
+
+static void setBlockingTimeout(tls_connection *conn, long long timeout) {
+ anetBlock(NULL, conn->c.fd);
+ anetSendTimeout(NULL, conn->c.fd, timeout);
+ anetRecvTimeout(NULL, conn->c.fd, timeout);
+}
+
+static void unsetBlockingTimeout(tls_connection *conn) {
+ anetNonBlock(NULL, conn->c.fd);
+ anetSendTimeout(NULL, conn->c.fd, 0);
+ anetRecvTimeout(NULL, conn->c.fd, 0);
+}
+
+static int connTLSBlockingConnect(connection *conn_, const char *addr, int port, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+ int ret;
+
+ if (conn->c.state != CONN_STATE_NONE) return C_ERR;
+
+ /* Initiate socket blocking connect first */
+ if (CT_Socket.blocking_connect(conn_, addr, port, timeout) == C_ERR) return C_ERR;
+
+ /* Initiate TLS connection now. We set up a send/recv timeout on the socket,
+ * which means the specified timeout will not be enforced accurately. */
+ SSL_set_fd(conn->ssl, conn->c.fd);
+ setBlockingTimeout(conn, timeout);
+
+ if ((ret = SSL_connect(conn->ssl)) <= 0) {
+ conn->c.state = CONN_STATE_ERROR;
+ return C_ERR;
+ }
+ unsetBlockingTimeout(conn);
+
+ conn->c.state = CONN_STATE_CONNECTED;
+ return C_OK;
+}
+
+static ssize_t connTLSSyncWrite(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ setBlockingTimeout(conn, timeout);
+ SSL_clear_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ int ret = SSL_write(conn->ssl, ptr, size);
+ SSL_set_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ unsetBlockingTimeout(conn);
+
+ return ret;
+}
+
+static ssize_t connTLSSyncRead(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+
+ setBlockingTimeout(conn, timeout);
+ int ret = SSL_read(conn->ssl, ptr, size);
+ unsetBlockingTimeout(conn);
+
+ return ret;
+}
+
+static ssize_t connTLSSyncReadLine(connection *conn_, char *ptr, ssize_t size, long long timeout) {
+ tls_connection *conn = (tls_connection *) conn_;
+ ssize_t nread = 0;
+
+ setBlockingTimeout(conn, timeout);
+
+ size--;
+ while(size) {
+ char c;
+
+ if (SSL_read(conn->ssl,&c,1) <= 0) {
+ nread = -1;
+ goto exit;
+ }
+ if (c == '\n') {
+ *ptr = '\0';
+ if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
+ goto exit;
+ } else {
+ *ptr++ = c;
+ *ptr = '\0';
+ nread++;
+ }
+ size--;
+ }
+exit:
+ unsetBlockingTimeout(conn);
+ return nread;
+}
+
+ConnectionType CT_TLS = {
+ .ae_handler = tlsEventHandler,
+ .accept = connTLSAccept,
+ .connect = connTLSConnect,
+ .blocking_connect = connTLSBlockingConnect,
+ .read = connTLSRead,
+ .write = connTLSWrite,
+ .close = connTLSClose,
+ .set_write_handler = connTLSSetWriteHandler,
+ .set_read_handler = connTLSSetReadHandler,
+ .get_last_error = connTLSGetLastError,
+ .sync_write = connTLSSyncWrite,
+ .sync_read = connTLSSyncRead,
+ .sync_readline = connTLSSyncReadLine,
+};
+
+int tlsHasPendingData() {
+ if (!pending_list)
+ return 0;
+ return listLength(pending_list) > 0;
+}
+
+void tlsProcessPendingData() {
+ listIter li;
+ listNode *ln;
+
+ listRewind(pending_list,&li);
+ while((ln = listNext(&li))) {
+ tls_connection *conn = listNodeValue(ln);
+ tlsHandleEvent(conn, AE_READABLE);
+ }
+}
+
+#else /* USE_OPENSSL */
+
+void tlsInit(void) {
+}
+
+int tlsConfigure(redisTLSContextConfig *ctx_config) {
+ UNUSED(ctx_config);
+ return C_OK;
+}
+
+connection *connCreateTLS(void) {
+ return NULL;
+}
+
+connection *connCreateAcceptedTLS(int fd, int require_auth) {
+ UNUSED(fd);
+ UNUSED(require_auth);
+
+ return NULL;
+}
+
+int tlsHasPendingData() {
+ return 0;
+}
+
+void tlsProcessPendingData() {
+}
+
+#endif
diff --git a/tests/cluster/run.tcl b/tests/cluster/run.tcl
index 93603ddc9..d9a7d7ee5 100644
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@@ -8,6 +8,7 @@ source ../instances.tcl
source ../../support/cluster.tcl ; # Redis Cluster client.
set ::instances_count 20 ; # How many instances we use at max.
+set ::tlsdir "../../tls"
proc main {} {
parse_options
diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl
index 68fba135e..33f861dc5 100644
--- a/tests/cluster/tests/04-resharding.tcl
+++ b/tests/cluster/tests/04-resharding.tcl
@@ -4,6 +4,7 @@
# are preseved across iterations.
source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
test "Create a 5 nodes cluster" {
create_cluster 5 5
@@ -79,6 +80,7 @@ test "Cluster consistency during live resharding" {
--cluster-to $target \
--cluster-slots 100 \
--cluster-yes \
+ {*}[rediscli_tls_config "../../../tests"] \
| [info nameofexecutable] \
../tests/helpers/onlydots.tcl \
&] 0]
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
index 3d8b7b04b..dd18a979a 100644
--- a/tests/cluster/tests/12-replica-migration-2.tcl
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -5,6 +5,7 @@
# other masters have slaves.
source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
# Create a cluster with 5 master and 15 slaves, to make sure there are no
# empty masters and make rebalancing simpler to handle during the test.
@@ -33,7 +34,9 @@ test "Resharding all the master #0 slots away from it" {
set output [exec \
../../../src/redis-cli --cluster rebalance \
127.0.0.1:[get_instance_attrib redis 0 port] \
+ {*}[rediscli_tls_config "../../../tests"] \
--cluster-weight ${master0_id}=0 >@ stdout ]
+
}
test "Master #0 should lose its replicas" {
@@ -51,6 +54,7 @@ test "Resharding back some slot to master #0" {
set output [exec \
../../../src/redis-cli --cluster rebalance \
127.0.0.1:[get_instance_attrib redis 0 port] \
+ {*}[rediscli_tls_config "../../../tests"] \
--cluster-weight ${master0_id}=.01 \
--cluster-use-empty-masters >@ stdout]
}
diff --git a/tests/helpers/bg_block_op.tcl b/tests/helpers/bg_block_op.tcl
index 238d3874f..c8b323308 100644
--- a/tests/helpers/bg_block_op.tcl
+++ b/tests/helpers/bg_block_op.tcl
@@ -1,6 +1,8 @@
source tests/support/redis.tcl
source tests/support/util.tcl
+set ::tlsdir "tests/tls"
+
# This function sometimes writes sometimes blocking-reads from lists/sorted
# sets. There are multiple processes like this executing at the same time
# so that we have some chance to trap some corner condition if there is
@@ -8,8 +10,8 @@ source tests/support/util.tcl
# space to just a few elements, and balance the operations so that it is
# unlikely that lists and zsets just get more data without ever causing
# blocking.
-proc bg_block_op {host port db ops} {
- set r [redis $host $port]
+proc bg_block_op {host port db ops tls} {
+ set r [redis $host $port 0 $tls]
$r select $db
for {set j 0} {$j < $ops} {incr j} {
@@ -49,4 +51,4 @@ proc bg_block_op {host port db ops} {
}
}
-bg_block_op [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3]
+bg_block_op [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4]
diff --git a/tests/helpers/bg_complex_data.tcl b/tests/helpers/bg_complex_data.tcl
index dffd7c668..e888748a7 100644
--- a/tests/helpers/bg_complex_data.tcl
+++ b/tests/helpers/bg_complex_data.tcl
@@ -1,10 +1,12 @@
source tests/support/redis.tcl
source tests/support/util.tcl
-proc bg_complex_data {host port db ops} {
- set r [redis $host $port]
+set ::tlsdir "tests/tls"
+
+proc bg_complex_data {host port db ops tls} {
+ set r [redis $host $port 0 $tls]
$r select $db
createComplexDataset $r $ops
}
-bg_complex_data [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3]
+bg_complex_data [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4]
diff --git a/tests/helpers/gen_write_load.tcl b/tests/helpers/gen_write_load.tcl
index 6d1a34516..fd6aad40c 100644
--- a/tests/helpers/gen_write_load.tcl
+++ b/tests/helpers/gen_write_load.tcl
@@ -1,8 +1,10 @@
source tests/support/redis.tcl
-proc gen_write_load {host port seconds} {
+set ::tlsdir "tests/tls"
+
+proc gen_write_load {host port seconds tls} {
set start_time [clock seconds]
- set r [redis $host $port 1]
+ set r [redis $host $port 0 $tls]
$r select 9
while 1 {
$r set [expr rand()] [expr rand()]
@@ -12,4 +14,4 @@ proc gen_write_load {host port seconds} {
}
}
-gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2]
+gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3]
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 357b34818..0a0cbab12 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -17,6 +17,7 @@ source ../support/test.tcl
set ::verbose 0
set ::valgrind 0
+set ::tls 0
set ::pause_on_error 0
set ::simulate_error 0
set ::failed 0
@@ -69,7 +70,19 @@ proc spawn_instance {type base_port count {conf {}}} {
# Write the instance config file.
set cfgfile [file join $dirname $type.conf]
set cfg [open $cfgfile w]
- puts $cfg "port $port"
+ if {$::tls} {
+ puts $cfg "tls-port $port"
+ puts $cfg "tls-replication yes"
+ puts $cfg "tls-cluster yes"
+ puts $cfg "port 0"
+ puts $cfg [format "tls-cert-file %s/../../tls/redis.crt" [pwd]]
+ puts $cfg [format "tls-key-file %s/../../tls/redis.key" [pwd]]
+ puts $cfg [format "tls-dh-params-file %s/../../tls/redis.dh" [pwd]]
+ puts $cfg [format "tls-ca-cert-file %s/../../tls/ca.crt" [pwd]]
+ puts $cfg "loglevel debug"
+ } else {
+ puts $cfg "port $port"
+ }
puts $cfg "dir ./$dirname"
puts $cfg "logfile log.txt"
# Add additional config files
@@ -88,7 +101,7 @@ proc spawn_instance {type base_port count {conf {}}} {
}
# Push the instance into the right list
- set link [redis 127.0.0.1 $port]
+ set link [redis 127.0.0.1 $port 0 $::tls]
$link reconnect 1
lappend ::${type}_instances [list \
pid $pid \
@@ -148,6 +161,13 @@ proc parse_options {} {
set ::simulate_error 1
} elseif {$opt eq {--valgrind}} {
set ::valgrind 1
+ } elseif {$opt eq {--tls}} {
+ package require tls 1.6
+ ::tls::init \
+ -cafile "$::tlsdir/ca.crt" \
+ -certfile "$::tlsdir/redis.crt" \
+ -keyfile "$::tlsdir/redis.key"
+ set ::tls 1
} elseif {$opt eq "--help"} {
puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
puts "\nOptions:"
@@ -492,7 +512,7 @@ proc restart_instance {type id} {
}
# Connect with it with a fresh link
- set link [redis 127.0.0.1 $port]
+ set link [redis 127.0.0.1 $port 0 $::tls]
$link reconnect 1
set_instance_attrib $type $id link $link
diff --git a/tests/integration/aof-race.tcl b/tests/integration/aof-race.tcl
index fb8d71083..2991e7962 100644
--- a/tests/integration/aof-race.tcl
+++ b/tests/integration/aof-race.tcl
@@ -13,8 +13,9 @@ tags {"aof"} {
# cleaned after a child responsible for an AOF rewrite exited. This buffer
# was subsequently appended to the new AOF, resulting in duplicate commands.
start_server_aof [list dir $server_path] {
- set client [redis [srv host] [srv port]]
- set bench [open "|src/redis-benchmark -q -p [srv port] -c 20 -n 20000 incr foo" "r+"]
+ set client [redis [srv host] [srv port] 0 $::tls]
+ set bench [open "|src/redis-benchmark -q -s [srv unixsocket] -c 20 -n 20000 incr foo" "r+"]
+
after 100
# Benchmark should be running by now: start background rewrite
@@ -29,7 +30,7 @@ tags {"aof"} {
# Restart server to replay AOF
start_server_aof [list dir $server_path] {
- set client [redis [srv host] [srv port]]
+ set client [redis [srv host] [srv port] 0 $::tls]
assert_equal 20000 [$client get foo]
}
}
diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl
index e397faeeb..2734de7f1 100644
--- a/tests/integration/aof.tcl
+++ b/tests/integration/aof.tcl
@@ -52,7 +52,7 @@ tags {"aof"} {
assert_equal 1 [is_alive $srv]
}
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
test "Truncated AOF loaded: we expect foo to be equal to 5" {
assert {[$client get foo] eq "5"}
@@ -69,7 +69,7 @@ tags {"aof"} {
assert_equal 1 [is_alive $srv]
}
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
test "Truncated AOF loaded: we expect foo to be equal to 6 now" {
assert {[$client get foo] eq "6"}
@@ -170,7 +170,7 @@ tags {"aof"} {
}
test "Fixed AOF: Keyspace should contain values that were parseable" {
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
wait_for_condition 50 100 {
[catch {$client ping} e] == 0
} else {
@@ -194,7 +194,7 @@ tags {"aof"} {
}
test "AOF+SPOP: Set should have 1 member" {
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
wait_for_condition 50 100 {
[catch {$client ping} e] == 0
} else {
@@ -218,7 +218,7 @@ tags {"aof"} {
}
test "AOF+SPOP: Set should have 1 member" {
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
wait_for_condition 50 100 {
[catch {$client ping} e] == 0
} else {
@@ -241,7 +241,7 @@ tags {"aof"} {
}
test "AOF+EXPIRE: List should be empty" {
- set client [redis [dict get $srv host] [dict get $srv port]]
+ set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls]
wait_for_condition 50 100 {
[catch {$client ping} e] == 0
} else {
@@ -257,4 +257,35 @@ tags {"aof"} {
r expire x -1
}
}
+
+ start_server {overrides {appendonly {yes} appendfilename {appendonly.aof} appendfsync always}} {
+ test {AOF fsync always barrier issue} {
+ set rd [redis_deferring_client]
+ # Set a sleep when aof is flushed, so that we have a chance to look
+ # at the aof size and detect if the response of an incr command
+ # arrives before the data was written (and hopefully fsynced)
+ # We create a big reply, which will hopefully not have room in the
+ # socket buffers, and will install a write handler, then we sleep
+ # a big and issue the incr command, hoping that the last portion of
+ # the output buffer write, and the processing of the incr will happen
+ # in the same event loop cycle.
+ # Since the socket buffers and timing are unpredictable, we fuzz this
+ # test with slightly different sizes and sleeps a few times.
+ for {set i 0} {$i < 10} {incr i} {
+ r debug aof-flush-sleep 0
+ r del x
+ r setrange x [expr {int(rand()*5000000)+10000000}] x
+ r debug aof-flush-sleep 500000
+ set aof [file join [lindex [r config get dir] 1] appendonly.aof]
+ set size1 [file size $aof]
+ $rd get x
+ after [expr {int(rand()*30)}]
+ $rd incr new_value
+ $rd read
+ $rd read
+ set size2 [file size $aof]
+ assert {$size1 != $size2}
+ }
+ }
+ }
}
diff --git a/tests/integration/block-repl.tcl b/tests/integration/block-repl.tcl
index c111b805b..07eceb228 100644
--- a/tests/integration/block-repl.tcl
+++ b/tests/integration/block-repl.tcl
@@ -2,9 +2,9 @@
# Unlike stream operations such operations are "pop" style, so they consume
# the list or sorted set, and must be replicated correctly.
-proc start_bg_block_op {host port db ops} {
+proc start_bg_block_op {host port db ops tls} {
set tclsh [info nameofexecutable]
- exec $tclsh tests/helpers/bg_block_op.tcl $host $port $db $ops &
+ exec $tclsh tests/helpers/bg_block_op.tcl $host $port $db $ops $tls &
}
proc stop_bg_block_op {handle} {
@@ -18,9 +18,9 @@ start_server {tags {"repl"}} {
set master_port [srv -1 port]
set slave [srv 0 client]
- set load_handle0 [start_bg_block_op $master_host $master_port 9 100000]
- set load_handle1 [start_bg_block_op $master_host $master_port 9 100000]
- set load_handle2 [start_bg_block_op $master_host $master_port 9 100000]
+ set load_handle0 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
+ set load_handle1 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
+ set load_handle2 [start_bg_block_op $master_host $master_port 9 100000 $::tls]
test {First server should have role slave after SLAVEOF} {
$slave slaveof $master_host $master_port
diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl
index 3d408368e..b5ad021e2 100644
--- a/tests/integration/psync2-reg.tcl
+++ b/tests/integration/psync2-reg.tcl
@@ -18,6 +18,7 @@ start_server {} {
set R($j) [srv [expr 0-$j] client]
set R_host($j) [srv [expr 0-$j] host]
set R_port($j) [srv [expr 0-$j] port]
+ set R_unixsocket($j) [srv [expr 0-$j] unixsocket]
if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
}
@@ -36,7 +37,7 @@ start_server {} {
}
set cycle_start_time [clock milliseconds]
- set bench_pid [exec src/redis-benchmark -p $R_port(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &]
+ set bench_pid [exec src/redis-benchmark -s $R_unixsocket(0) -n 10000000 -r 1000 incr __rand_int__ > /dev/null &]
while 1 {
set elapsed [expr {[clock milliseconds]-$cycle_start_time}]
if {$elapsed > $duration*1000} break
diff --git a/tests/integration/redis-cli.tcl b/tests/integration/redis-cli.tcl
index 40e4222e3..5d1635950 100644
--- a/tests/integration/redis-cli.tcl
+++ b/tests/integration/redis-cli.tcl
@@ -1,7 +1,10 @@
+source tests/support/cli.tcl
+
start_server {tags {"cli"}} {
proc open_cli {} {
set ::env(TERM) dumb
- set fd [open [format "|src/redis-cli -p %d -n 9" [srv port]] "r+"]
+ set cmdline [rediscli [srv port] "-n 9"]
+ set fd [open "|$cmdline" "r+"]
fconfigure $fd -buffering none
fconfigure $fd -blocking false
fconfigure $fd -translation binary
@@ -54,8 +57,8 @@ start_server {tags {"cli"}} {
}
proc _run_cli {opts args} {
- set cmd [format "src/redis-cli -p %d -n 9 $args" [srv port]]
- foreach {key value} $opts {
+ set cmd [rediscli [srv port] [list -n 9 {*}$args]]
+ foreach {key value} $args {
if {$key eq "pipe"} {
set cmd "sh -c \"$value | $cmd\""
}
diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl
index 1c18582c5..4bd1f47f7 100644
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@@ -466,3 +466,167 @@ test {diskless loading short read} {
}
}
+# get current stime and utime metrics for a thread (since it's creation)
+proc get_cpu_metrics { statfile } {
+ if { [ catch {
+ set fid [ open $statfile r ]
+ set data [ read $fid 1024 ]
+ ::close $fid
+ set data [ split $data ]
+
+ ;## number of jiffies it has been scheduled...
+ set utime [ lindex $data 13 ]
+ set stime [ lindex $data 14 ]
+ } err ] } {
+ error "assertion:can't parse /proc: $err"
+ }
+ set mstime [clock milliseconds]
+ return [ list $mstime $utime $stime ]
+}
+
+# compute %utime and %stime of a thread between two measurements
+proc compute_cpu_usage {start end} {
+ set clock_ticks [exec getconf CLK_TCK]
+ # convert ms time to jiffies and calc delta
+ set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
+ set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
+ set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
+ set pucpu [ expr { ($utime / $dtime) * 100 } ]
+ set pscpu [ expr { ($stime / $dtime) * 100 } ]
+ return [ list $pucpu $pscpu ]
+}
+
+
+# test diskless rdb pipe with multiple replicas, which may drop half way
+start_server {tags {"repl"}} {
+ set master [srv 0 client]
+ $master config set repl-diskless-sync yes
+ $master config set repl-diskless-sync-delay 1
+ set master_host [srv 0 host]
+ set master_port [srv 0 port]
+ set master_pid [srv 0 pid]
+ # put enough data in the db that the rdb file will be bigger than the socket buffers
+ # and since we'll have key-load-delay of 100, 10000 keys will take at least 1 second
+ # we also need the replica to process requests during transfer (which it does only once in 2mb)
+ $master debug populate 10000 test 10000
+ $master config set rdbcompression no
+ # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
+ set os [catch {exec unamee}]
+ set measure_time [expr {$os == "Linux"} ? 1 : 0]
+ foreach all_drop {no slow fast all} {
+ test "diskless $all_drop replicas drop during rdb pipe" {
+ set replicas {}
+ set replicas_alive {}
+ # start one replica that will read the rdb fast, and one that will be slow
+ start_server {} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+ start_server {} {
+ lappend replicas [srv 0 client]
+ lappend replicas_alive [srv 0 client]
+
+ # start replication
+ # it's enough for just one replica to be slow, and have it's write handler enabled
+ # so that the whole rdb generation process is bound to that
+ [lindex $replicas 0] config set repl-diskless-load swapdb
+ [lindex $replicas 0] config set key-load-delay 100
+ [lindex $replicas 0] replicaof $master_host $master_port
+ [lindex $replicas 1] replicaof $master_host $master_port
+
+ # wait for the replicas to start reading the rdb
+ # using the log file since the replica only responds to INFO once in 2mb
+ wait_for_log_message -1 "*Loading DB in memory*" 8 800 10
+
+ if {$measure_time} {
+ set master_statfile "/proc/$master_pid/stat"
+ set master_start_metrics [get_cpu_metrics $master_statfile]
+ set start_time [clock seconds]
+ }
+
+ # wait a while so that the pipe socket writer will be
+ # blocked on write (since replica 0 is slow to read from the socket)
+ after 500
+
+ # add some command to be present in the command stream after the rdb.
+ $master incr $all_drop
+
+ # disconnect replicas depending on the current test
+ if {$all_drop == "all" || $all_drop == "fast"} {
+ exec kill [srv 0 pid]
+ set replicas_alive [lreplace $replicas_alive 1 1]
+ }
+ if {$all_drop == "all" || $all_drop == "slow"} {
+ exec kill [srv -1 pid]
+ set replicas_alive [lreplace $replicas_alive 0 0]
+ }
+
+ # wait for rdb child to exit
+ wait_for_condition 500 100 {
+ [s -2 rdb_bgsave_in_progress] == 0
+ } else {
+ fail "rdb child didn't terminate"
+ }
+
+ # make sure we got what we were aiming for, by looking for the message in the log file
+ if {$all_drop == "all"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" 12 1 1
+ }
+ if {$all_drop == "no"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" 12 1 1
+ }
+ if {$all_drop == "slow" || $all_drop == "fast"} {
+ wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" 12 1 1
+ }
+
+ # make sure we don't have a busy loop going thought epoll_wait
+ if {$measure_time} {
+ set master_end_metrics [get_cpu_metrics $master_statfile]
+ set time_elapsed [expr {[clock seconds]-$start_time}]
+ set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
+ set master_utime [lindex $master_cpu 0]
+ set master_stime [lindex $master_cpu 1]
+ if {$::verbose} {
+ puts "elapsed: $time_elapsed"
+ puts "master utime: $master_utime"
+ puts "master stime: $master_stime"
+ }
+ if {$all_drop == "all" || $all_drop == "slow"} {
+ assert {$master_utime < 70}
+ assert {$master_stime < 70}
+ }
+ if {$all_drop == "none" || $all_drop == "fast"} {
+ assert {$master_utime < 15}
+ assert {$master_stime < 15}
+ }
+ }
+
+ # verify the data integrity
+ foreach replica $replicas_alive {
+ # Wait that replicas acknowledge they are online so
+ # we are sure that DBSIZE and DEBUG DIGEST will not
+ # fail because of timing issues.
+ wait_for_condition 50 100 {
+ [lindex [$replica role] 3] eq {connected}
+ } else {
+ fail "replicas still not connected after some time"
+ }
+
+ # Make sure that replicas and master have same
+ # number of keys
+ wait_for_condition 50 100 {
+ [$master dbsize] == [$replica dbsize]
+ } else {
+ fail "Different number of keys between master and replicas after too long time."
+ }
+
+ # Check digests
+ set digest [$master debug digest]
+ set digest0 [$replica debug digest]
+ assert {$digest ne 0000000000000000000000000000000000000000}
+ assert {$digest eq $digest0}
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/tests/sentinel/run.tcl b/tests/sentinel/run.tcl
index 9a2fcfb49..996af906a 100644
--- a/tests/sentinel/run.tcl
+++ b/tests/sentinel/run.tcl
@@ -6,6 +6,7 @@ cd tests/sentinel
source ../instances.tcl
set ::instances_count 5 ; # How many instances we use at max.
+set ::tlsdir "../../tls"
proc main {} {
parse_options
diff --git a/tests/sentinel/tests/07-down-conditions.tcl b/tests/sentinel/tests/07-down-conditions.tcl
index fb2993b6f..a12ea3151 100644
--- a/tests/sentinel/tests/07-down-conditions.tcl
+++ b/tests/sentinel/tests/07-down-conditions.tcl
@@ -1,6 +1,7 @@
# Test conditions where an instance is considered to be down
source "../tests/includes/init-tests.tcl"
+source "../../../tests/support/cli.tcl"
proc ensure_master_up {} {
wait_for_condition 1000 50 {
@@ -28,7 +29,7 @@ test "Crash the majority of Sentinels to prevent failovers for this unit" {
test "SDOWN is triggered by non-responding but not crashed instance" {
lassign [S 4 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] host port
ensure_master_up
- exec ../../../src/redis-cli -h $host -p $port debug sleep 10 > /dev/null &
+ exec ../../../src/redis-cli -h $host -p $port {*}[rediscli_tls_config "../../../tests"] debug sleep 10 > /dev/null &
ensure_master_down
ensure_master_up
}
diff --git a/tests/support/cli.tcl b/tests/support/cli.tcl
new file mode 100644
index 000000000..37c902a50
--- /dev/null
+++ b/tests/support/cli.tcl
@@ -0,0 +1,19 @@
+proc rediscli_tls_config {testsdir} {
+ set tlsdir [file join $testsdir tls]
+ set cert [file join $tlsdir redis.crt]
+ set key [file join $tlsdir redis.key]
+ set cacert [file join $tlsdir ca.crt]
+
+ if {$::tls} {
+ return [list --tls --cert $cert --key $key --cacert $cacert]
+ } else {
+ return {}
+ }
+}
+
+proc rediscli {port {opts {}}} {
+ set cmd [list src/redis-cli -p $port]
+ lappend cmd {*}[rediscli_tls_config "tests"]
+ lappend cmd {*}$opts
+ return $cmd
+}
diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl
index 1576053b4..74587e1f7 100644
--- a/tests/support/cluster.tcl
+++ b/tests/support/cluster.tcl
@@ -62,7 +62,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
lassign [split $ip_port :] start_host start_port
if {[catch {
set r {}
- set r [redis $start_host $start_port]
+ set r [redis $start_host $start_port 0 $::tls]
set nodes_descr [$r cluster nodes]
$r close
} e]} {
@@ -107,7 +107,7 @@ proc ::redis_cluster::__method__refresh_nodes_map {id} {
# Connect to the node
set link {}
- catch {set link [redis $host $port]}
+ catch {set link [redis $host $port 0 $::tls]}
# Build this node description as an hash.
set node [dict create \
diff --git a/tests/support/redis.tcl b/tests/support/redis.tcl
index cd8ae3a34..a90ac7f29 100644
--- a/tests/support/redis.tcl
+++ b/tests/support/redis.tcl
@@ -39,8 +39,18 @@ array set ::redis::callback {}
array set ::redis::state {} ;# State in non-blocking reply reading
array set ::redis::statestack {} ;# Stack of states, for nested mbulks
-proc redis {{server 127.0.0.1} {port 6379} {defer 0}} {
- set fd [socket $server $port]
+proc redis {{server 127.0.0.1} {port 6379} {defer 0} {tls 0} {tlsoptions {}}} {
+ if {$tls} {
+ package require tls
+ ::tls::init \
+ -cafile "$::tlsdir/ca.crt" \
+ -certfile "$::tlsdir/redis.crt" \
+ -keyfile "$::tlsdir/redis.key" \
+ {*}$tlsoptions
+ set fd [::tls::socket $server $port]
+ } else {
+ set fd [socket $server $port]
+ }
fconfigure $fd -translation binary
set id [incr ::redis::id]
set ::redis::fd($id) $fd
@@ -48,6 +58,7 @@ proc redis {{server 127.0.0.1} {port 6379} {defer 0}} {
set ::redis::blocking($id) 1
set ::redis::deferred($id) $defer
set ::redis::reconnect($id) 0
+ set ::redis::tls $tls
::redis::redis_reset_state $id
interp alias {} ::redis::redisHandle$id {} ::redis::__dispatch__ $id
}
@@ -72,7 +83,11 @@ proc ::redis::__dispatch__raw__ {id method argv} {
# Reconnect the link if needed.
if {$fd eq {}} {
lassign $::redis::addr($id) host port
- set ::redis::fd($id) [socket $host $port]
+ if {$::redis::tls} {
+ set ::redis::fd($id) [::tls::socket $host $port]
+ } else {
+ set ::redis::fd($id) [socket $host $port]
+ }
fconfigure $::redis::fd($id) -translation binary
set fd $::redis::fd($id)
}
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index 0edb25d8a..b20f1ad36 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -92,7 +92,11 @@ proc is_alive config {
proc ping_server {host port} {
set retval 0
if {[catch {
- set fd [socket $host $port]
+ if {$::tls} {
+ set fd [::tls::socket $host $port]
+ } else {
+ set fd [socket $host $port]
+ }
fconfigure $fd -translation binary
puts $fd "PING\r\n"
flush $fd
@@ -136,7 +140,6 @@ proc tags {tags code} {
uplevel 1 $code
set ::tags [lrange $::tags 0 end-[llength $tags]]
}
-
proc start_server {options {code undefined}} {
# If we are running against an external server, we just push the
# host/port pair in the stack the first time
@@ -145,7 +148,7 @@ proc start_server {options {code undefined}} {
set srv {}
dict set srv "host" $::host
dict set srv "port" $::port
- set client [redis $::host $::port]
+ set client [redis $::host $::port 0 $::tls]
dict set srv "client" $client
$client select 9
@@ -178,6 +181,13 @@ proc start_server {options {code undefined}} {
set data [split [exec cat "tests/assets/$baseconfig"] "\n"]
set config {}
+ if {$::tls} {
+ dict set config "tls-cert-file" [format "%s/tests/tls/redis.crt" [pwd]]
+ dict set config "tls-key-file" [format "%s/tests/tls/redis.key" [pwd]]
+ dict set config "tls-dh-params-file" [format "%s/tests/tls/redis.dh" [pwd]]
+ dict set config "tls-ca-cert-file" [format "%s/tests/tls/ca.crt" [pwd]]
+ dict set config "loglevel" "debug"
+ }
foreach line $data {
if {[string length $line] > 0 && [string index $line 0] ne "#"} {
set elements [split $line " "]
@@ -192,7 +202,17 @@ proc start_server {options {code undefined}} {
# start every server on a different port
set ::port [find_available_port [expr {$::port+1}]]
- dict set config port $::port
+ if {$::tls} {
+ dict set config "port" 0
+ dict set config "tls-port" $::port
+ dict set config "tls-cluster" "yes"
+ dict set config "tls-replication" "yes"
+ } else {
+ dict set config port $::port
+ }
+
+ set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]]
+ dict set config "unixsocket" $unixsocket
# apply overrides from global space and arguments
foreach {directive arguments} [concat $::global_overrides $overrides] {
@@ -254,10 +274,11 @@ proc start_server {options {code undefined}} {
}
# setup properties to be able to initialize a client object
+ set port_param [expr $::tls ? {"tls-port"} : {"port"}]
set host $::host
set port $::port
if {[dict exists $config bind]} { set host [dict get $config bind] }
- if {[dict exists $config port]} { set port [dict get $config port] }
+ if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
# setup config dict
dict set srv "config_file" $config_file
@@ -267,6 +288,7 @@ proc start_server {options {code undefined}} {
dict set srv "port" $port
dict set srv "stdout" $stdout
dict set srv "stderr" $stderr
+ dict set srv "unixsocket" $unixsocket
# if a block of code is supplied, we wait for the server to become
# available, create a client object and kill the server afterwards
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index c2e76afad..7ecf5b79c 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -395,7 +395,7 @@ proc colorstr {color str} {
# of seconds to the specified Redis instance.
proc start_write_load {host port seconds} {
set tclsh [info nameofexecutable]
- exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds &
+ exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls &
}
# Stop a process generating write load executed with start_write_load.
@@ -423,7 +423,7 @@ proc lshuffle {list} {
# of ops to the specified Redis instance.
proc start_bg_complex_data {host port db ops} {
set tclsh [info nameofexecutable]
- exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops &
+ exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops $::tls &
}
# Stop a process generating write load executed with start_bg_complex_data.
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index 1442067f5..cb7e4e328 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -63,6 +63,7 @@ set ::all_tests {
unit/lazyfree
unit/wait
unit/pendingquerybuf
+ unit/tls
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
@@ -71,6 +72,7 @@ set ::host 127.0.0.1
set ::port 21111
set ::traceleaks 0
set ::valgrind 0
+set ::tls 0
set ::stack_logging 0
set ::verbose 0
set ::quiet 0
@@ -92,6 +94,7 @@ set ::dont_clean 0
set ::wait_server 0
set ::stop_on_failure 0
set ::loop 0
+set ::tlsdir "tests/tls"
# Set to 1 when we are running in client mode. The Redis test uses a
# server-client model to run tests simultaneously. The server instance
@@ -146,7 +149,7 @@ proc reconnect {args} {
set host [dict get $srv "host"]
set port [dict get $srv "port"]
set config [dict get $srv "config"]
- set client [redis $host $port]
+ set client [redis $host $port 0 $::tls]
dict set srv "client" $client
# select the right db when we don't have to authenticate
@@ -166,7 +169,7 @@ proc redis_deferring_client {args} {
}
# create client that defers reading reply
- set client [redis [srv $level "host"] [srv $level "port"] 1]
+ set client [redis [srv $level "host"] [srv $level "port"] 1 $::tls]
# select the right db and read the response (OK)
$client select 9
@@ -204,7 +207,7 @@ proc test_server_main {} {
if {!$::quiet} {
puts "Starting test server at port $port"
}
- socket -server accept_test_clients -myaddr 127.0.0.1 $port
+ socket -server accept_test_clients -myaddr 127.0.0.1 $port
# Start the client instances
set ::clients_pids {}
@@ -450,6 +453,7 @@ proc print_help_screen {} {
"--stop Blocks once the first test fails."
"--loop Execute the specified set of tests forever."
"--wait-server Wait after server is started (so that you can attach a debugger)."
+ "--tls Run tests in TLS mode."
"--help Print this help screen."
} "\n"]
}
@@ -486,6 +490,13 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
}
} elseif {$opt eq {--quiet}} {
set ::quiet 1
+ } elseif {$opt eq {--tls}} {
+ package require tls 1.6
+ set ::tls 1
+ ::tls::init \
+ -cafile "$::tlsdir/ca.crt" \
+ -certfile "$::tlsdir/redis.crt" \
+ -keyfile "$::tlsdir/redis.key"
} elseif {$opt eq {--host}} {
set ::external 1
set ::host $arg
@@ -565,7 +576,11 @@ if {[llength $::single_tests] > 0} {
}
proc attach_to_replication_stream {} {
- set s [socket [srv 0 "host"] [srv 0 "port"]]
+ if {$::tls} {
+ set s [::tls::socket [srv 0 "host"] [srv 0 "port"]]
+ } else {
+ set s [socket [srv 0 "host"] [srv 0 "port"]]
+ }
fconfigure $s -translation binary
puts -nonewline $s "SYNC\r\n"
flush $s
diff --git a/tests/unit/limits.tcl b/tests/unit/limits.tcl
index b37ea9b0f..38ba76208 100644
--- a/tests/unit/limits.tcl
+++ b/tests/unit/limits.tcl
@@ -1,4 +1,9 @@
start_server {tags {"limits"} overrides {maxclients 10}} {
+ if {$::tls} {
+ set expected_code "*I/O error*"
+ } else {
+ set expected_code "*ERR max*reached*"
+ }
test {Check if maxclients works refusing connections} {
set c 0
catch {
@@ -12,5 +17,5 @@ start_server {tags {"limits"} overrides {maxclients 10}} {
} e
assert {$c > 8 && $c <= 10}
set e
- } {*ERR max*reached*}
+ } $expected_code
}
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index 965902456..7720c055a 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -166,7 +166,11 @@ start_server {tags {"other"}} {
tags {protocol} {
test {PIPELINING stresser (also a regression for the old epoll bug)} {
- set fd2 [socket $::host $::port]
+ if {$::tls} {
+ set fd2 [::tls::socket $::host $::port]
+ } else {
+ set fd2 [socket $::host $::port]
+ }
fconfigure $fd2 -encoding binary -translation binary
puts -nonewline $fd2 "SELECT 9\r\n"
flush $fd2
diff --git a/tests/unit/protocol.tcl b/tests/unit/protocol.tcl
index ac99c3abb..4dfdc6f59 100644
--- a/tests/unit/protocol.tcl
+++ b/tests/unit/protocol.tcl
@@ -72,7 +72,11 @@ start_server {tags {"protocol"}} {
foreach seq [list "\x00" "*\x00" "$\x00"] {
incr c
test "Protocol desync regression test #$c" {
- set s [socket [srv 0 host] [srv 0 port]]
+ if {$::tls} {
+ set s [::tls::socket [srv 0 host] [srv 0 port]]
+ } else {
+ set s [socket [srv 0 host] [srv 0 port]]
+ }
puts -nonewline $s $seq
set payload [string repeat A 1024]"\n"
set test_start [clock seconds]
diff --git a/tests/unit/tls.tcl b/tests/unit/tls.tcl
new file mode 100644
index 000000000..950f65557
--- /dev/null
+++ b/tests/unit/tls.tcl
@@ -0,0 +1,105 @@
+start_server {tags {"tls"}} {
+ if {$::tls} {
+ package require tls
+
+ test {TLS: Not accepting non-TLS connections on a TLS port} {
+ set s [redis [srv 0 host] [srv 0 port]]
+ catch {$s PING} e
+ set e
+ } {*I/O error*}
+
+ test {TLS: Verify tls-auth-clients behaves as expected} {
+ set s [redis [srv 0 host] [srv 0 port]]
+ ::tls::import [$s channel]
+ catch {$s PING} e
+ assert_match {*error*} $e
+
+ r CONFIG SET tls-auth-clients no
+
+ set s [redis [srv 0 host] [srv 0 port]]
+ ::tls::import [$s channel]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-auth-clients yes
+ }
+
+ test {TLS: Verify tls-protocols behaves as expected} {
+ r CONFIG SET tls-protocols TLSv1
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1 0}]
+ catch {$s PING} e
+ assert_match {*I/O error*} $e
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1 1}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-protocols TLSv1.1
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1.1 0}]
+ catch {$s PING} e
+ assert_match {*I/O error*} $e
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1.1 1}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-protocols TLSv1.2
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1.2 0}]
+ catch {$s PING} e
+ assert_match {*I/O error*} $e
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-tls1.2 1}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-protocols ""
+ }
+
+ test {TLS: Verify tls-ciphers behaves as expected} {
+ r CONFIG SET tls-protocols TLSv1.2
+ r CONFIG SET tls-ciphers "DEFAULT:-AES128-SHA256"
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-cipher "-ALL:AES128-SHA256"}]
+ catch {$s PING} e
+ assert_match {*I/O error*} $e
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-cipher "-ALL:AES256-SHA256"}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-ciphers "DEFAULT"
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-cipher "-ALL:AES128-SHA256"}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ r CONFIG SET tls-protocols ""
+ r CONFIG SET tls-ciphers "DEFAULT"
+ }
+
+ test {TLS: Verify tls-prefer-server-ciphers behaves as expected} {
+ r CONFIG SET tls-protocols TLSv1.2
+ r CONFIG SET tls-ciphers "AES128-SHA256:AES256-SHA256"
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-cipher "AES256-SHA256:AES128-SHA256"}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ assert_equal "AES256-SHA256" [dict get [::tls::status [$s channel]] cipher]
+
+ r CONFIG SET tls-prefer-server-ciphers yes
+
+ set s [redis [srv 0 host] [srv 0 port] 0 1 {-cipher "AES256-SHA256:AES128-SHA256"}]
+ catch {$s PING} e
+ assert_match {PONG} $e
+
+ assert_equal "AES128-SHA256" [dict get [::tls::status [$s channel]] cipher]
+
+ r CONFIG SET tls-protocols ""
+ r CONFIG SET tls-ciphers "DEFAULT"
+ }
+ }
+}
diff --git a/tests/unit/wait.tcl b/tests/unit/wait.tcl
index e2f5d2942..c9cfa6ed4 100644
--- a/tests/unit/wait.tcl
+++ b/tests/unit/wait.tcl
@@ -1,3 +1,5 @@
+source tests/support/cli.tcl
+
start_server {tags {"wait"}} {
start_server {} {
set slave [srv 0 client]
@@ -31,7 +33,8 @@ start_server {} {
}
test {WAIT should not acknowledge 1 additional copy if slave is blocked} {
- exec src/redis-cli -h $slave_host -p $slave_port debug sleep 5 > /dev/null 2> /dev/null &
+ set cmd [rediscli $slave_port "-h $slave_host debug sleep 5"]
+ exec {*}$cmd > /dev/null 2> /dev/null &
after 1000 ;# Give redis-cli the time to execute the command.
$master set foo 0
$master incr foo
diff --git a/utils/gen-test-certs.sh b/utils/gen-test-certs.sh
new file mode 100755
index 000000000..a46edc55a
--- /dev/null
+++ b/utils/gen-test-certs.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+mkdir -p tests/tls
+openssl genrsa -out tests/tls/ca.key 4096
+openssl req \
+ -x509 -new -nodes -sha256 \
+ -key tests/tls/ca.key \
+ -days 3650 \
+ -subj '/O=Redis Test/CN=Certificate Authority' \
+ -out tests/tls/ca.crt
+openssl genrsa -out tests/tls/redis.key 2048
+openssl req \
+ -new -sha256 \
+ -key tests/tls/redis.key \
+ -subj '/O=Redis Test/CN=Server' | \
+ openssl x509 \
+ -req -sha256 \
+ -CA tests/tls/ca.crt \
+ -CAkey tests/tls/ca.key \
+ -CAserial tests/tls/ca.txt \
+ -CAcreateserial \
+ -days 365 \
+ -out tests/tls/redis.crt
+openssl dhparam -out tests/tls/redis.dh 2048