summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/daily.yml4
-rw-r--r--src/acl.c12
-rw-r--r--src/cluster.c12
-rw-r--r--src/networking.c68
-rw-r--r--src/replication.c107
-rw-r--r--src/server.c4
-rw-r--r--src/server.h4
-rw-r--r--tests/instances.tcl3
-rw-r--r--tests/integration/psync2-pingoff.tcl145
-rw-r--r--tests/integration/psync2.tcl101
-rw-r--r--tests/modules/Makefile5
-rw-r--r--tests/support/server.tcl21
-rw-r--r--tests/support/util.tcl21
-rw-r--r--tests/test_helper.tcl31
-rw-r--r--tests/unit/other.tcl4
15 files changed, 268 insertions, 274 deletions
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index c22d49895..acc4dd33a 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -47,7 +47,9 @@ jobs:
sudo apt-get install tcl8.5
./runtest --accurate --verbose
- name: module api test
- run: ./runtest-moduleapi --verbose
+ run: |
+ make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time
+ ./runtest-moduleapi --verbose
test-tls:
runs-on: ubuntu-latest
diff --git a/src/acl.c b/src/acl.c
index c3a724c33..6dd0f70ac 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -732,10 +732,11 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) {
* EEXIST: You are adding a key pattern after "*" was already added. This is
* almost surely an error on the user side.
* ENODEV: The password you are trying to remove from the user does not exist.
- * EBADMSG: The hash you are trying to add is not a valid hash.
+ * EBADMSG: The hash you are trying to add is not a valid hash.
*/
int ACLSetUser(user *u, const char *op, ssize_t oplen) {
if (oplen == -1) oplen = strlen(op);
+ if (oplen == 0) return C_OK; /* Empty string is a no-operation. */
if (!strcasecmp(op,"on")) {
u->flags |= USER_FLAG_ENABLED;
u->flags &= ~USER_FLAG_DISABLED;
@@ -1297,7 +1298,7 @@ sds ACLLoadFromFile(const char *filename) {
if (lines[i][0] == '\0') continue;
/* Split into arguments */
- argv = sdssplitargs(lines[i],&argc);
+ argv = sdssplitlen(lines[i],sdslen(lines[i])," ",1,&argc);
if (argv == NULL) {
errors = sdscatprintf(errors,
"%s:%d: unbalanced quotes in acl line. ",
@@ -1329,11 +1330,14 @@ sds ACLLoadFromFile(const char *filename) {
continue;
}
- /* Try to process the line using the fake user to validate iif
- * the rules are able to apply cleanly. */
+ /* Try to process the line using the fake user to validate if
+ * the rules are able to apply cleanly. At this stage we also
+ * trim trailing spaces, so that we don't have to handle that
+ * in ACLSetUser(). */
ACLSetUser(fakeuser,"reset",-1);
int j;
for (j = 2; j < argc; j++) {
+ argv[j] = sdstrim(argv[j],"\t\r\n");
if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) {
char *errmsg = ACLSetUserStringError();
errors = sdscatprintf(errors,
diff --git a/src/cluster.c b/src/cluster.c
index a2fab323a..24b14d1dc 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1463,7 +1463,10 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
}
} else {
/* If it's not in NOADDR state and we don't have it, we
- * start a handshake process against this IP/PORT pairs.
+ * add it to our trusted dict with exact nodeid and flag.
+ * Note that we cannot simply start a handshake against
+ * this IP/PORT pairs, since IP/PORT can be reused already,
+ * otherwise we risk joining another cluster.
*
* Note that we require that the sender of this gossip message
* is a well known node in our cluster, otherwise we risk
@@ -1472,7 +1475,12 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
!(flags & CLUSTER_NODE_NOADDR) &&
!clusterBlacklistExists(g->nodename))
{
- clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport));
+ clusterNode *node;
+ node = createClusterNode(g->nodename, flags);
+ memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+ node->port = ntohs(g->port);
+ node->cport = ntohs(g->cport);
+ clusterAddNode(node);
}
}
diff --git a/src/networking.c b/src/networking.c
index 364654642..8d3e057b7 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -396,31 +396,7 @@ void addReplyErrorLength(client *c, const char *s, size_t len) {
if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog &&
server.repl_backlog_histlen > 0)
{
- long long dumplen = 256;
- if (server.repl_backlog_histlen < dumplen)
- dumplen = server.repl_backlog_histlen;
-
- /* Identify the first byte to dump. */
- long long idx =
- (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) %
- server.repl_backlog_size;
-
- /* Scan the circular buffer to collect 'dumplen' bytes. */
- sds dump = sdsempty();
- while(dumplen) {
- long long thislen =
- ((server.repl_backlog_size - idx) < dumplen) ?
- (server.repl_backlog_size - idx) : dumplen;
-
- dump = sdscatrepr(dump,server.repl_backlog+idx,thislen);
- dumplen -= thislen;
- idx = 0;
- }
-
- /* Finally log such bytes: this is vital debugging info to
- * understand what happened. */
- serverLog(LL_WARNING,"Latest backlog is: '%s'", dump);
- sdsfree(dump);
+ showLatestBacklog();
}
server.stat_unexpected_error_replies++;
}
@@ -1037,25 +1013,14 @@ static void freeClientArgv(client *c) {
/* Close all the slaves connections. This is useful in chained replication
* when we resync with our own master and want to force all our slaves to
- * resync with us as well.
- *
- * If 'async' is non-zero we free the clients asynchronously. This is needed
- * when we call this function from a context where in the chain of the
- * callers somebody is iterating the list of clients. For instance when
- * CLIENT KILL TYPE master is called, caching the master client may
- * adjust the meaningful offset of replication, and in turn call
- * discionectSlaves(). Since CLIENT KILL iterates the clients this is
- * not safe. */
-void disconnectSlaves(int async) {
+ * resync with us as well. */
+void disconnectSlaves(void) {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
listNode *ln = listFirst(server.slaves);
- if (async)
- freeClientAsync((client*)ln->value);
- else
- freeClient((client*)ln->value);
+ freeClient((client*)ln->value);
}
}
@@ -1553,6 +1518,19 @@ int processInlineBuffer(client *c) {
if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE)
c->repl_ack_time = server.unixtime;
+ /* Masters should never send us inline protocol to run actual
+ * commands. If this happens, it is likely due to a bug in Redis where
+ * we got some desynchronization in the protocol, for example
+ * beause of a PSYNC gone bad.
+ *
+ * However the is an exception: masters may send us just a newline
+ * to keep the connection active. */
+ if (querylen != 0 && c->flags & CLIENT_MASTER) {
+ serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
+ setProtocolError("Master using the inline protocol. Desync?",c);
+ return C_ERR;
+ }
+
/* Move querybuffer position to the next query in the buffer. */
c->qb_pos += querylen+linefeed_chars;
@@ -1576,7 +1554,7 @@ int processInlineBuffer(client *c) {
* CLIENT_PROTOCOL_ERROR. */
#define PROTO_DUMP_LEN 128
static void setProtocolError(const char *errstr, client *c) {
- if (server.verbosity <= LL_VERBOSE) {
+ if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) {
sds client = catClientInfoString(sdsempty(),c);
/* Sample some protocol to given an idea about what was inside. */
@@ -1595,7 +1573,9 @@ static void setProtocolError(const char *errstr, client *c) {
}
/* Log all the client and protocol info. */
- serverLog(LL_VERBOSE,
+ int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING :
+ LL_VERBOSE;
+ serverLog(loglevel,
"Protocol error (%s) from client: %s. %s", errstr, client, buf);
sdsfree(client);
}
@@ -1754,7 +1734,6 @@ int processMultibulkBuffer(client *c) {
* 2. In the case of master clients, the replication offset is updated.
* 3. Propagate commands we got from our master to replicas down the line. */
void commandProcessed(client *c) {
- int cmd_is_ping = c->cmd && c->cmd->proc == pingCommand;
long long prev_offset = c->reploff;
if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
/* Update the applied replication offset of our master. */
@@ -1779,16 +1758,11 @@ void commandProcessed(client *c) {
* sub-replicas and to the replication backlog. */
if (c->flags & CLIENT_MASTER) {
long long applied = c->reploff - prev_offset;
- long long prev_master_repl_meaningful_offset = server.master_repl_meaningful_offset;
if (applied) {
replicationFeedSlavesFromMasterStream(server.slaves,
c->pending_querybuf, applied);
sdsrange(c->pending_querybuf,applied,-1);
}
- /* The server.master_repl_meaningful_offset variable represents
- * the offset of the replication stream without the pending PINGs. */
- if (cmd_is_ping)
- server.master_repl_meaningful_offset = prev_master_repl_meaningful_offset;
}
}
diff --git a/src/replication.c b/src/replication.c
index 2b21b02d8..063a8705e 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -39,7 +39,6 @@
#include <sys/socket.h>
#include <sys/stat.h>
-long long adjustMeaningfulReplOffset(int *adjusted);
void replicationDiscardCachedMaster(void);
void replicationResurrectCachedMaster(connection *conn);
void replicationSendAck(void);
@@ -163,7 +162,6 @@ void feedReplicationBacklog(void *ptr, size_t len) {
unsigned char *p = ptr;
server.master_repl_offset += len;
- server.master_repl_meaningful_offset = server.master_repl_offset;
/* This is a circular buffer, so write as much data we can at every
* iteration and rewind the "idx" index if we reach the limit. */
@@ -309,6 +307,40 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
}
}
+/* This is a debugging function that gets called when we detect something
+ * wrong with the replication protocol: the goal is to peek into the
+ * replication backlog and show a few final bytes to make simpler to
+ * guess what kind of bug it could be. */
+void showLatestBacklog(void) {
+ if (server.repl_backlog == NULL) return;
+
+ long long dumplen = 256;
+ if (server.repl_backlog_histlen < dumplen)
+ dumplen = server.repl_backlog_histlen;
+
+ /* Identify the first byte to dump. */
+ long long idx =
+ (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) %
+ server.repl_backlog_size;
+
+ /* Scan the circular buffer to collect 'dumplen' bytes. */
+ sds dump = sdsempty();
+ while(dumplen) {
+ long long thislen =
+ ((server.repl_backlog_size - idx) < dumplen) ?
+ (server.repl_backlog_size - idx) : dumplen;
+
+ dump = sdscatrepr(dump,server.repl_backlog+idx,thislen);
+ dumplen -= thislen;
+ idx = 0;
+ }
+
+ /* Finally log such bytes: this is vital debugging info to
+ * understand what happened. */
+ serverLog(LL_WARNING,"Latest backlog is: '%s'", dump);
+ sdsfree(dump);
+}
+
/* This function is used in order to proxy what we receive from our master
* to our sub-slaves. */
#include <ctype.h>
@@ -1831,7 +1863,6 @@ void readSyncBulkPayload(connection *conn) {
* we are starting a new history. */
memcpy(server.replid,server.master->replid,sizeof(server.replid));
server.master_repl_offset = server.master->reploff;
- server.master_repl_meaningful_offset = server.master->reploff;
clearReplicationId2();
/* Let's create the replication backlog if needed. Slaves need to
@@ -2086,7 +2117,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
memcpy(server.cached_master->replid,new,sizeof(server.replid));
/* Disconnect all the sub-slaves: they need to be notified. */
- disconnectSlaves(0);
+ disconnectSlaves();
}
}
@@ -2359,7 +2390,7 @@ void syncWithMaster(connection *conn) {
* as well, if we have any sub-slaves. The master may transfer us an
* entirely different data set and we have no way to incrementally feed
* our slaves after that. */
- disconnectSlaves(0); /* Force our slaves to resync with us as well. */
+ disconnectSlaves(); /* Force our slaves to resync with us as well. */
freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
/* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
@@ -2506,7 +2537,7 @@ void replicationSetMaster(char *ip, int port) {
/* Force our slaves to resync with us as well. They may hopefully be able
* to partially resync with us, but we can notify the replid change. */
- disconnectSlaves(0);
+ disconnectSlaves();
cancelReplicationHandshake();
/* Before destroying our master state, create a cached master using
* our own parameters, to later PSYNC with the new master. */
@@ -2557,7 +2588,7 @@ void replicationUnsetMaster(void) {
* of the replication ID change (see shiftReplicationId() call). However
* the slaves will be able to partially resync with us, so it will be
* a very fast reconnection. */
- disconnectSlaves(0);
+ disconnectSlaves();
server.repl_state = REPL_STATE_NONE;
/* We need to make sure the new master will start the replication stream
@@ -2759,11 +2790,6 @@ void replicationCacheMaster(client *c) {
* pending outputs to the master. */
sdsclear(server.master->querybuf);
sdsclear(server.master->pending_querybuf);
-
- /* Adjust reploff and read_reploff to the last meaningful offset we
- * executed. This is the offset the replica will use for future PSYNC. */
- int offset_adjusted;
- server.master->reploff = adjustMeaningfulReplOffset(&offset_adjusted);
server.master->read_reploff = server.master->reploff;
if (c->flags & CLIENT_MULTI) discardTransaction(c);
listEmpty(c->reply);
@@ -2786,53 +2812,6 @@ void replicationCacheMaster(client *c) {
* so make sure to adjust the replication state. This function will
* also set server.master to NULL. */
replicationHandleMasterDisconnection();
-
- /* If we trimmed this replica backlog, we need to disconnect our chained
- * replicas (if any), otherwise they may have the PINGs we removed
- * from the stream and their offset would no longer match: upon
- * disconnection they will also trim the final PINGs and will be able
- * to incrementally sync without issues. */
- if (offset_adjusted) disconnectSlaves(1);
-}
-
-/* If the "meaningful" offset, that is the offset without the final PINGs
- * in the stream, is different than the last offset, use it instead:
- * often when the master is no longer reachable, replicas will never
- * receive the PINGs, however the master will end with an incremented
- * offset because of the PINGs and will not be able to incrementally
- * PSYNC with the new master.
- * This function trims the replication backlog when needed, and returns
- * the offset to be used for future partial sync.
- *
- * If the integer 'adjusted' was passed by reference, it is set to 1
- * if the function call actually modified the offset and the replication
- * backlog, otherwise it is set to 0. It can be NULL if the caller is
- * not interested in getting this info. */
-long long adjustMeaningfulReplOffset(int *adjusted) {
- if (server.master_repl_offset > server.master_repl_meaningful_offset) {
- long long delta = server.master_repl_offset -
- server.master_repl_meaningful_offset;
- serverLog(LL_NOTICE,
- "Using the meaningful offset %lld instead of %lld to exclude "
- "the final PINGs (%lld bytes difference)",
- server.master_repl_meaningful_offset,
- server.master_repl_offset,
- delta);
- server.master_repl_offset = server.master_repl_meaningful_offset;
- if (server.repl_backlog_histlen <= delta) {
- server.repl_backlog_histlen = 0;
- server.repl_backlog_idx = 0;
- } else {
- server.repl_backlog_histlen -= delta;
- server.repl_backlog_idx =
- (server.repl_backlog_idx + (server.repl_backlog_size - delta)) %
- server.repl_backlog_size;
- }
- if (adjusted) *adjusted = 1;
- } else {
- if (adjusted) *adjusted = 0;
- }
- return server.master_repl_offset;
}
/* This function is called when a master is turend into a slave, in order to
@@ -2854,7 +2833,7 @@ void replicationCacheMasterUsingMyself(void) {
* by replicationCreateMasterClient(). We'll later set the created
* master as server.cached_master, so the replica will use such
* offset for PSYNC. */
- server.master_initial_offset = adjustMeaningfulReplOffset(NULL);
+ server.master_initial_offset = server.master_repl_offset;
/* The master client we create can be set to any DBID, because
* the new master will start its replication stream with SELECT. */
@@ -3246,18 +3225,10 @@ void replicationCron(void) {
clientsArePaused();
if (!manual_failover_in_progress) {
- long long before_ping = server.master_repl_meaningful_offset;
ping_argv[0] = createStringObject("PING",4);
replicationFeedSlaves(server.slaves, server.slaveseldb,
ping_argv, 1);
decrRefCount(ping_argv[0]);
- /* The server.master_repl_meaningful_offset variable represents
- * the offset of the replication stream without the pending PINGs.
- * This is useful to set the right replication offset for PSYNC
- * when the master is turned into a replica. Otherwise pending
- * PINGs may not allow it to perform an incremental sync with the
- * new master. */
- server.master_repl_meaningful_offset = before_ping;
}
}
diff --git a/src/server.c b/src/server.c
index 5bc4666ee..b7a6a928f 100644
--- a/src/server.c
+++ b/src/server.c
@@ -2394,7 +2394,6 @@ void initServerConfig(void) {
server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
server.master_repl_offset = 0;
- server.master_repl_meaningful_offset = 0;
/* Replication partial resync backlog */
server.repl_backlog = NULL;
@@ -4471,7 +4470,6 @@ sds genRedisInfoString(const char *section) {
"master_replid:%s\r\n"
"master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
- "master_repl_meaningful_offset:%lld\r\n"
"second_repl_offset:%lld\r\n"
"repl_backlog_active:%d\r\n"
"repl_backlog_size:%lld\r\n"
@@ -4480,7 +4478,6 @@ sds genRedisInfoString(const char *section) {
server.replid,
server.replid2,
server.master_repl_offset,
- server.master_repl_meaningful_offset,
server.second_replid_offset,
server.repl_backlog != NULL,
server.repl_backlog_size,
@@ -4858,7 +4855,6 @@ void loadDataFromDisk(void) {
{
memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
server.master_repl_offset = rsi.repl_offset;
- server.master_repl_meaningful_offset = rsi.repl_offset;
/* If we are a slave, create a cached master from this
* information, in order to allow partial resynchronizations
* with masters. */
diff --git a/src/server.h b/src/server.h
index 2d17d69c8..a08585292 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1261,7 +1261,6 @@ struct redisServer {
char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
long long master_repl_offset; /* My current replication offset */
- long long master_repl_meaningful_offset; /* Offset minus latest PINGs. */
long long second_replid_offset; /* Accept offsets up to this for replid2. */
int slaveseldb; /* Last SELECTed DB in replication output */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
@@ -1660,7 +1659,7 @@ int getClientType(client *c);
int getClientTypeByName(char *name);
char *getClientTypeName(int class);
void flushSlavesOutputBuffers(void);
-void disconnectSlaves(int async);
+void disconnectSlaves(void);
int listenToPort(int port, int *fds, int *count);
void pauseClients(mstime_t duration);
int clientsArePaused(void);
@@ -1811,6 +1810,7 @@ void clearReplicationId2(void);
void chopReplicationBacklog(void);
void replicationCacheMasterUsingMyself(void);
void feedReplicationBacklog(void *ptr, size_t len);
+void showLatestBacklog(void);
void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 0a0cbab12..3a4fadca0 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -25,6 +25,7 @@ set ::sentinel_instances {}
set ::redis_instances {}
set ::sentinel_base_port 20000
set ::redis_base_port 30000
+set ::redis_port_count 1024
set ::pids {} ; # We kill everything at exit
set ::dirs {} ; # We remove all the temp dirs at exit
set ::run_matching {} ; # If non empty, only tests matching pattern are run.
@@ -57,7 +58,7 @@ proc exec_instance {type cfgfile} {
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
for {set j 0} {$j < $count} {incr j} {
- set port [find_available_port $base_port]
+ set port [find_available_port $base_port $::redis_port_count]
incr base_port
puts "Starting $type #$j at port $port"
diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl
index 2c2303141..5a9a46d16 100644
--- a/tests/integration/psync2-pingoff.tcl
+++ b/tests/integration/psync2-pingoff.tcl
@@ -1,6 +1,9 @@
-# Test the meaningful offset implementation to make sure masters
-# are able to PSYNC with replicas even if the replication stream
-# has pending PINGs at the end.
+# These tests were added together with the meaningful offset implementation
+# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that
+# servers are able to PSYNC with replicas even if the replication stream has
+# PINGs at the end which present in one sever and missing on another.
+# We keep these tests just because they reproduce edge cases in the replication
+# logic in hope they'll be able to spot some problem in the future.
start_server {tags {"psync2"}} {
start_server {} {
@@ -16,7 +19,7 @@ start_server {} {
}
# Setup replication
- test "PSYNC2 meaningful offset: setup" {
+ test "PSYNC2 pingoff: setup" {
$R(1) replicaof $R_host(0) $R_port(0)
$R(0) set foo bar
wait_for_condition 50 1000 {
@@ -27,7 +30,7 @@ start_server {} {
}
}
- test "PSYNC2 meaningful offset: write and wait replication" {
+ test "PSYNC2 pingoff: write and wait replication" {
$R(0) INCR counter
$R(0) INCR counter
$R(0) INCR counter
@@ -41,7 +44,7 @@ start_server {} {
# In this test we'll make sure the replica will get stuck, but with
# an active connection: this way the master will continue to send PINGs
# every second (we modified the PING period earlier)
- test "PSYNC2 meaningful offset: pause replica and promote it" {
+ test "PSYNC2 pingoff: pause replica and promote it" {
$R(1) MULTI
$R(1) DEBUG SLEEP 5
$R(1) SLAVEOF NO ONE
@@ -50,14 +53,22 @@ start_server {} {
}
test "Make the old master a replica of the new one and check conditions" {
- set sync_partial [status $R(1) sync_partial_ok]
- assert {$sync_partial == 0}
+ assert_equal [status $R(1) sync_full] 0
$R(0) REPLICAOF $R_host(1) $R_port(1)
wait_for_condition 50 1000 {
- [status $R(1) sync_partial_ok] == 1
+ [status $R(1) sync_full] == 1
} else {
- fail "The new master was not able to partial sync"
+ fail "The new master was not able to sync"
}
+
+ # make sure replication is still alive and kicking
+ $R(1) incr x
+ wait_for_condition 50 1000 {
+ [$R(0) get x] == 1
+ } else {
+ fail "replica didn't get incr"
+ }
+ assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
}
}}
@@ -65,6 +76,106 @@ start_server {} {
start_server {tags {"psync2"}} {
start_server {} {
start_server {} {
+start_server {} {
+start_server {} {
+ test {test various edge cases of repl topology changes with missing pings at the end} {
+ set master [srv -4 client]
+ set master_host [srv -4 host]
+ set master_port [srv -4 port]
+ set replica1 [srv -3 client]
+ set replica2 [srv -2 client]
+ set replica3 [srv -1 client]
+ set replica4 [srv -0 client]
+
+ $replica1 replicaof $master_host $master_port
+ $replica2 replicaof $master_host $master_port
+ $replica3 replicaof $master_host $master_port
+ $replica4 replicaof $master_host $master_port
+ wait_for_condition 50 1000 {
+ [status $master connected_slaves] == 4
+ } else {
+ fail "replicas didn't connect"
+ }
+
+ $master incr x
+ wait_for_condition 50 1000 {
+ [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
+ [$replica3 get x] == 1 && [$replica4 get x] == 1
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # disconnect replica1 and replica2
+ # and wait for the master to send a ping to replica3 and replica4
+ $replica1 replicaof no one
+ $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
+ $master config set repl-ping-replica-period 1
+ after 1500
+
+ # make everyone sync from the replica1 that didn't get the last ping from the old master
+ # replica4 will keep syncing from the old master which now syncs from replica1
+ # and replica2 will re-connect to the old master (which went back in time)
+ set new_master_host [srv -3 host]
+ set new_master_port [srv -3 port]
+ $replica3 replicaof $new_master_host $new_master_port
+ $master replicaof $new_master_host $new_master_port
+ $replica2 replicaof $master_host $master_port
+ wait_for_condition 50 1000 {
+ [status $replica2 master_link_status] == "up" &&
+ [status $replica3 master_link_status] == "up" &&
+ [status $replica4 master_link_status] == "up" &&
+ [status $master master_link_status] == "up"
+ } else {
+ fail "replicas didn't connect"
+ }
+
+ # make sure replication is still alive and kicking
+ $replica1 incr x
+ wait_for_condition 50 1000 {
+ [$replica2 get x] == 2 &&
+ [$replica3 get x] == 2 &&
+ [$replica4 get x] == 2 &&
+ [$master get x] == 2
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # make sure we have the right amount of full syncs
+ assert_equal [status $master sync_full] 6
+ assert_equal [status $replica1 sync_full] 2
+ assert_equal [status $replica2 sync_full] 0
+ assert_equal [status $replica3 sync_full] 0
+ assert_equal [status $replica4 sync_full] 0
+
+ # force psync
+ $master client kill type master
+ $replica2 client kill type master
+ $replica3 client kill type master
+ $replica4 client kill type master
+
+ # make sure replication is still alive and kicking
+ $replica1 incr x
+ wait_for_condition 50 1000 {
+ [$replica2 get x] == 3 &&
+ [$replica3 get x] == 3 &&
+ [$replica4 get x] == 3 &&
+ [$master get x] == 3
+ } else {
+ fail "replicas didn't get incr"
+ }
+
+ # make sure we have the right amount of full syncs
+ assert_equal [status $master sync_full] 6
+ assert_equal [status $replica1 sync_full] 2
+ assert_equal [status $replica2 sync_full] 0
+ assert_equal [status $replica3 sync_full] 0
+ assert_equal [status $replica4 sync_full] 0
+}
+}}}}}
+
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
for {set j 0} {$j < 3} {incr j} {
set R($j) [srv [expr 0-$j] client]
@@ -97,7 +208,7 @@ start_server {} {
[status $R(1) master_link_status] == "up" &&
[status $R(2) master_link_status] == "up" &&
[status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
- [status $R(1) sync_partial_ok] == $sync_partial_replica + 1
+ [status $R(1) sync_partial_ok] == $sync_partial_replica
} else {
fail "Disconnected replica failed to PSYNC with master"
}
@@ -106,7 +217,15 @@ start_server {} {
# offsets match with the master
assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
- assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset]
- assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset]
+
+ # make sure replication is still alive and kicking
+ $R(0) incr counter2
+ wait_for_condition 50 1000 {
+ [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2
+ } else {
+ fail "replicas didn't get incr"
+ }
+ assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
+ assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
}
}}}
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
index 29a880f99..3f636463a 100644
--- a/tests/integration/psync2.tcl
+++ b/tests/integration/psync2.tcl
@@ -242,7 +242,6 @@ start_server {} {
show_cluster_status
fail "Replicas and master offsets were unable to match *exactly*."
}
- $R($master_id) config set repl-ping-replica-period 10
# Limit anyway the maximum number of cycles. This is useful when the
# test is skipped via --only option of the test suite. In that case
@@ -370,103 +369,3 @@ start_server {} {
}
}}}}}
-
-start_server {tags {"psync2"}} {
-start_server {} {
-start_server {} {
-start_server {} {
-start_server {} {
- test {pings at the end of replication stream are ignored for psync} {
- set master [srv -4 client]
- set master_host [srv -4 host]
- set master_port [srv -4 port]
- set replica1 [srv -3 client]
- set replica2 [srv -2 client]
- set replica3 [srv -1 client]
- set replica4 [srv -0 client]
-
- $replica1 replicaof $master_host $master_port
- $replica2 replicaof $master_host $master_port
- $replica3 replicaof $master_host $master_port
- $replica4 replicaof $master_host $master_port
- wait_for_condition 50 1000 {
- [status $master connected_slaves] == 4
- } else {
- fail "replicas didn't connect"
- }
-
- $master incr x
- wait_for_condition 50 1000 {
- [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
- [$replica3 get x] == 1 && [$replica4 get x] == 1
- } else {
- fail "replicas didn't get incr"
- }
-
- # disconnect replica1 and replica2
- # and wait for the master to send a ping to replica3 and replica4
- $replica1 replicaof no one
- $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
- $master config set repl-ping-replica-period 1
- after 1500
-
- # make everyone sync from the replica1 that didn't get the last ping from the old master
- # replica4 will keep syncing from the old master which now syncs from replica1
- # and replica2 will re-connect to the old master (which went back in time)
- set new_master_host [srv -3 host]
- set new_master_port [srv -3 port]
- $replica3 replicaof $new_master_host $new_master_port
- $master replicaof $new_master_host $new_master_port
- $replica2 replicaof $master_host $master_port
- wait_for_condition 50 1000 {
- [status $replica2 master_link_status] == "up" &&
- [status $replica3 master_link_status] == "up" &&
- [status $replica4 master_link_status] == "up" &&
- [status $master master_link_status] == "up"
- } else {
- fail "replicas didn't connect"
- }
-
- # make sure replication is still alive and kicking
- $replica1 incr x
- wait_for_condition 50 1000 {
- [$replica2 get x] == 2 &&
- [$replica3 get x] == 2 &&
- [$replica4 get x] == 2 &&
- [$master get x] == 2
- } else {
- fail "replicas didn't get incr"
- }
-
- # make sure there are full syncs other than the initial ones
- assert_equal [status $master sync_full] 4
- assert_equal [status $replica1 sync_full] 0
- assert_equal [status $replica2 sync_full] 0
- assert_equal [status $replica3 sync_full] 0
- assert_equal [status $replica4 sync_full] 0
-
- # force psync
- $master client kill type master
- $replica2 client kill type master
- $replica3 client kill type master
- $replica4 client kill type master
-
- # make sure replication is still alive and kicking
- $replica1 incr x
- wait_for_condition 50 1000 {
- [$replica2 get x] == 3 &&
- [$replica3 get x] == 3 &&
- [$replica4 get x] == 3 &&
- [$master get x] == 3
- } else {
- fail "replicas didn't get incr"
- }
-
- # make sure there are full syncs other than the initial ones
- assert_equal [status $master sync_full] 4
- assert_equal [status $replica1 sync_full] 0
- assert_equal [status $replica2 sync_full] 0
- assert_equal [status $replica3 sync_full] 0
- assert_equal [status $replica4 sync_full] 0
-}
-}}}}}
diff --git a/tests/modules/Makefile b/tests/modules/Makefile
index 363231a87..39b8e6efa 100644
--- a/tests/modules/Makefile
+++ b/tests/modules/Makefile
@@ -28,11 +28,14 @@ TEST_MODULES = \
all: $(TEST_MODULES)
+32bit:
+ $(MAKE) CFLAGS="-m32" LDFLAGS="-melf_i386"
+
%.xo: %.c ../../src/redismodule.h
$(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
%.so: %.xo
- $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+ $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LDFLAGS) $(LIBS) -lc
.PHONY: clean
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index d086366dc..146ebc72c 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -214,14 +214,14 @@ proc start_server {options {code undefined}} {
dict set config dir [tmpdir server]
# start every server on a different port
- set ::port [find_available_port [expr {$::port+1}]]
+ set port [find_available_port $::baseport $::portcount]
if {$::tls} {
dict set config "port" 0
- dict set config "tls-port" $::port
+ dict set config "tls-port" $port
dict set config "tls-cluster" "yes"
dict set config "tls-replication" "yes"
} else {
- dict set config port $::port
+ dict set config port $port
}
set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]]
@@ -243,10 +243,10 @@ proc start_server {options {code undefined}} {
set server_started 0
while {$server_started == 0} {
if {$::verbose} {
- puts -nonewline "=== ($tags) Starting server ${::host}:${::port} "
+ puts -nonewline "=== ($tags) Starting server ${::host}:${port} "
}
- send_data_packet $::test_server_fd "server-spawning" "port $::port"
+ send_data_packet $::test_server_fd "server-spawning" "port $port"
if {$::valgrind} {
set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
@@ -291,19 +291,19 @@ proc start_server {options {code undefined}} {
# for availability. Other test clients may grab the port before we
# are able to do it for example.
if {$port_busy} {
- puts "Port $::port was already busy, trying another port..."
- set ::port [find_available_port [expr {$::port+1}]]
+ puts "Port $port was already busy, trying another port..."
+ set port [find_available_port $::baseport $::portcount]
if {$::tls} {
- dict set config "tls-port" $::port
+ dict set config "tls-port" $port
} else {
- dict set config port $::port
+ dict set config port $port
}
create_server_config_file $config_file $config
continue; # Try again
}
if {$code ne "undefined"} {
- set serverisup [server_is_up $::host $::port $retrynum]
+ set serverisup [server_is_up $::host $port $retrynum]
} else {
set serverisup 1
}
@@ -324,7 +324,6 @@ proc start_server {options {code undefined}} {
# setup properties to be able to initialize a client object
set port_param [expr $::tls ? {"tls-port"} : {"port"}]
set host $::host
- set port $::port
if {[dict exists $config bind]} { set host [dict get $config bind] }
if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 7ecf5b79c..8bec95374 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -344,21 +344,26 @@ proc roundFloat f {
format "%.10g" $f
}
-proc find_available_port start {
- for {set j $start} {$j < $start+1024} {incr j} {
- if {[catch {set fd1 [socket 127.0.0.1 $j]}] &&
- [catch {set fd2 [socket 127.0.0.1 [expr $j+10000]]}]} {
- return $j
+set ::last_port_attempted 0
+proc find_available_port {start count} {
+ set port [expr $::last_port_attempted + 1]
+ for {set attempts 0} {$attempts < $count} {incr attempts} {
+ if {$port < $start || $port >= $start+$count} {
+ set port $start
+ }
+ if {[catch {set fd1 [socket 127.0.0.1 $port]}] &&
+ [catch {set fd2 [socket 127.0.0.1 [expr $port+10000]]}]} {
+ set ::last_port_attempted $port
+ return $port
} else {
catch {
close $fd1
close $fd2
}
}
+ incr port
}
- if {$j == $start+1024} {
- error "Can't find a non busy port in the $start-[expr {$start+1023}] range."
- }
+ error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range."
}
# Test if TERM looks like to support colors
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index de0a64728..fba54acb5 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -65,12 +65,15 @@ set ::all_tests {
unit/wait
unit/pendingquerybuf
unit/tls
+ unit/tracking
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0
set ::host 127.0.0.1
-set ::port 21111
+set ::port 6379; # port for external server
+set ::baseport 21111; # initial port for spawned redis servers
+set ::portcount 8000; # we don't wanna use more than 10000 to avoid collision with cluster bus ports
set ::traceleaks 0
set ::valgrind 0
set ::tls 0
@@ -228,26 +231,26 @@ proc test_server_main {} {
set tclsh [info nameofexecutable]
# Open a listening socket, trying different ports in order to find a
# non busy one.
- set port [find_available_port 11111]
+ set clientport [find_available_port 11111 32]
if {!$::quiet} {
- puts "Starting test server at port $port"
+ puts "Starting test server at port $clientport"
}
- socket -server accept_test_clients -myaddr 127.0.0.1 $port
+ socket -server accept_test_clients -myaddr 127.0.0.1 $clientport
# Start the client instances
set ::clients_pids {}
if {$::external} {
set p [exec $tclsh [info script] {*}$::argv \
- --client $port --port $::port &]
+ --client $clientport &]
lappend ::clients_pids $p
} else {
- set start_port [expr {$::port+100}]
+ set start_port $::baseport
+ set port_count [expr {$::portcount / $::numclients}]
for {set j 0} {$j < $::numclients} {incr j} {
- set start_port [find_available_port $start_port]
set p [exec $tclsh [info script] {*}$::argv \
- --client $port --port $start_port &]
+ --client $clientport --baseport $start_port --portcount $port_count &]
lappend ::clients_pids $p
- incr start_port 10
+ incr start_port $port_count
}
}
@@ -510,6 +513,10 @@ proc print_help_screen {} {
"--loop Execute the specified set of tests forever."
"--wait-server Wait after server is started (so that you can attach a debugger)."
"--tls Run tests in TLS mode."
+ "--host <addr> Run tests against an external host."
+ "--port <port> TCP port to use against external host."
+ "--baseport <port> Initial port number for spawned redis servers."
+ "--portcount <num> Port range for spawned redis servers."
"--help Print this help screen."
} "\n"]
}
@@ -560,6 +567,12 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
} elseif {$opt eq {--port}} {
set ::port $arg
incr j
+ } elseif {$opt eq {--baseport}} {
+ set ::baseport $arg
+ incr j
+ } elseif {$opt eq {--portcount}} {
+ set ::portcount $arg
+ incr j
} elseif {$opt eq {--accurate}} {
set ::accurate 1
} elseif {$opt eq {--force-failure}} {
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index 7720c055a..20a32e795 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -167,9 +167,9 @@ start_server {tags {"other"}} {
tags {protocol} {
test {PIPELINING stresser (also a regression for the old epoll bug)} {
if {$::tls} {
- set fd2 [::tls::socket $::host $::port]
+ set fd2 [::tls::socket [srv host] [srv port]]
} else {
- set fd2 [socket $::host $::port]
+ set fd2 [socket [srv host] [srv port]]
}
fconfigure $fd2 -encoding binary -translation binary
puts -nonewline $fd2 "SELECT 9\r\n"