diff options
-rw-r--r-- | .github/workflows/daily.yml | 4 | ||||
-rw-r--r-- | src/acl.c | 12 | ||||
-rw-r--r-- | src/cluster.c | 12 | ||||
-rw-r--r-- | src/networking.c | 68 | ||||
-rw-r--r-- | src/replication.c | 107 | ||||
-rw-r--r-- | src/server.c | 4 | ||||
-rw-r--r-- | src/server.h | 4 | ||||
-rw-r--r-- | tests/instances.tcl | 3 | ||||
-rw-r--r-- | tests/integration/psync2-pingoff.tcl | 145 | ||||
-rw-r--r-- | tests/integration/psync2.tcl | 101 | ||||
-rw-r--r-- | tests/modules/Makefile | 5 | ||||
-rw-r--r-- | tests/support/server.tcl | 21 | ||||
-rw-r--r-- | tests/support/util.tcl | 21 | ||||
-rw-r--r-- | tests/test_helper.tcl | 31 | ||||
-rw-r--r-- | tests/unit/other.tcl | 4 |
15 files changed, 268 insertions, 274 deletions
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index c22d49895..acc4dd33a 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -47,7 +47,9 @@ jobs: sudo apt-get install tcl8.5 ./runtest --accurate --verbose - name: module api test - run: ./runtest-moduleapi --verbose + run: | + make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time + ./runtest-moduleapi --verbose test-tls: runs-on: ubuntu-latest @@ -732,10 +732,11 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * EEXIST: You are adding a key pattern after "*" was already added. This is * almost surely an error on the user side. * ENODEV: The password you are trying to remove from the user does not exist. - * EBADMSG: The hash you are trying to add is not a valid hash. + * EBADMSG: The hash you are trying to add is not a valid hash. */ int ACLSetUser(user *u, const char *op, ssize_t oplen) { if (oplen == -1) oplen = strlen(op); + if (oplen == 0) return C_OK; /* Empty string is a no-operation. */ if (!strcasecmp(op,"on")) { u->flags |= USER_FLAG_ENABLED; u->flags &= ~USER_FLAG_DISABLED; @@ -1297,7 +1298,7 @@ sds ACLLoadFromFile(const char *filename) { if (lines[i][0] == '\0') continue; /* Split into arguments */ - argv = sdssplitargs(lines[i],&argc); + argv = sdssplitlen(lines[i],sdslen(lines[i])," ",1,&argc); if (argv == NULL) { errors = sdscatprintf(errors, "%s:%d: unbalanced quotes in acl line. ", @@ -1329,11 +1330,14 @@ sds ACLLoadFromFile(const char *filename) { continue; } - /* Try to process the line using the fake user to validate iif - * the rules are able to apply cleanly. */ + /* Try to process the line using the fake user to validate if + * the rules are able to apply cleanly. At this stage we also + * trim trailing spaces, so that we don't have to handle that + * in ACLSetUser(). */ ACLSetUser(fakeuser,"reset",-1); int j; for (j = 2; j < argc; j++) { + argv[j] = sdstrim(argv[j],"\t\r\n"); if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) { char *errmsg = ACLSetUserStringError(); errors = sdscatprintf(errors, diff --git a/src/cluster.c b/src/cluster.c index a2fab323a..24b14d1dc 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1463,7 +1463,10 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { } } else { /* If it's not in NOADDR state and we don't have it, we - * start a handshake process against this IP/PORT pairs. + * add it to our trusted dict with exact nodeid and flag. + * Note that we cannot simply start a handshake against + * this IP/PORT pairs, since IP/PORT can be reused already, + * otherwise we risk joining another cluster. * * Note that we require that the sender of this gossip message * is a well known node in our cluster, otherwise we risk @@ -1472,7 +1475,12 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { !(flags & CLUSTER_NODE_NOADDR) && !clusterBlacklistExists(g->nodename)) { - clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport)); + clusterNode *node; + node = createClusterNode(g->nodename, flags); + memcpy(node->ip,g->ip,NET_IP_STR_LEN); + node->port = ntohs(g->port); + node->cport = ntohs(g->cport); + clusterAddNode(node); } } diff --git a/src/networking.c b/src/networking.c index 364654642..8d3e057b7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -396,31 +396,7 @@ void addReplyErrorLength(client *c, const char *s, size_t len) { if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog && server.repl_backlog_histlen > 0) { - long long dumplen = 256; - if (server.repl_backlog_histlen < dumplen) - dumplen = server.repl_backlog_histlen; - - /* Identify the first byte to dump. */ - long long idx = - (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) % - server.repl_backlog_size; - - /* Scan the circular buffer to collect 'dumplen' bytes. */ - sds dump = sdsempty(); - while(dumplen) { - long long thislen = - ((server.repl_backlog_size - idx) < dumplen) ? - (server.repl_backlog_size - idx) : dumplen; - - dump = sdscatrepr(dump,server.repl_backlog+idx,thislen); - dumplen -= thislen; - idx = 0; - } - - /* Finally log such bytes: this is vital debugging info to - * understand what happened. */ - serverLog(LL_WARNING,"Latest backlog is: '%s'", dump); - sdsfree(dump); + showLatestBacklog(); } server.stat_unexpected_error_replies++; } @@ -1037,25 +1013,14 @@ static void freeClientArgv(client *c) { /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to - * resync with us as well. - * - * If 'async' is non-zero we free the clients asynchronously. This is needed - * when we call this function from a context where in the chain of the - * callers somebody is iterating the list of clients. For instance when - * CLIENT KILL TYPE master is called, caching the master client may - * adjust the meaningful offset of replication, and in turn call - * discionectSlaves(). Since CLIENT KILL iterates the clients this is - * not safe. */ -void disconnectSlaves(int async) { + * resync with us as well. */ +void disconnectSlaves(void) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { listNode *ln = listFirst(server.slaves); - if (async) - freeClientAsync((client*)ln->value); - else - freeClient((client*)ln->value); + freeClient((client*)ln->value); } } @@ -1553,6 +1518,19 @@ int processInlineBuffer(client *c) { if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE) c->repl_ack_time = server.unixtime; + /* Masters should never send us inline protocol to run actual + * commands. If this happens, it is likely due to a bug in Redis where + * we got some desynchronization in the protocol, for example + * beause of a PSYNC gone bad. + * + * However the is an exception: masters may send us just a newline + * to keep the connection active. */ + if (querylen != 0 && c->flags & CLIENT_MASTER) { + serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master."); + setProtocolError("Master using the inline protocol. Desync?",c); + return C_ERR; + } + /* Move querybuffer position to the next query in the buffer. */ c->qb_pos += querylen+linefeed_chars; @@ -1576,7 +1554,7 @@ int processInlineBuffer(client *c) { * CLIENT_PROTOCOL_ERROR. */ #define PROTO_DUMP_LEN 128 static void setProtocolError(const char *errstr, client *c) { - if (server.verbosity <= LL_VERBOSE) { + if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) { sds client = catClientInfoString(sdsempty(),c); /* Sample some protocol to given an idea about what was inside. */ @@ -1595,7 +1573,9 @@ static void setProtocolError(const char *errstr, client *c) { } /* Log all the client and protocol info. */ - serverLog(LL_VERBOSE, + int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING : + LL_VERBOSE; + serverLog(loglevel, "Protocol error (%s) from client: %s. %s", errstr, client, buf); sdsfree(client); } @@ -1754,7 +1734,6 @@ int processMultibulkBuffer(client *c) { * 2. In the case of master clients, the replication offset is updated. * 3. Propagate commands we got from our master to replicas down the line. */ void commandProcessed(client *c) { - int cmd_is_ping = c->cmd && c->cmd->proc == pingCommand; long long prev_offset = c->reploff; if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ @@ -1779,16 +1758,11 @@ void commandProcessed(client *c) { * sub-replicas and to the replication backlog. */ if (c->flags & CLIENT_MASTER) { long long applied = c->reploff - prev_offset; - long long prev_master_repl_meaningful_offset = server.master_repl_meaningful_offset; if (applied) { replicationFeedSlavesFromMasterStream(server.slaves, c->pending_querybuf, applied); sdsrange(c->pending_querybuf,applied,-1); } - /* The server.master_repl_meaningful_offset variable represents - * the offset of the replication stream without the pending PINGs. */ - if (cmd_is_ping) - server.master_repl_meaningful_offset = prev_master_repl_meaningful_offset; } } diff --git a/src/replication.c b/src/replication.c index 2b21b02d8..063a8705e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,7 +39,6 @@ #include <sys/socket.h> #include <sys/stat.h> -long long adjustMeaningfulReplOffset(int *adjusted); void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(connection *conn); void replicationSendAck(void); @@ -163,7 +162,6 @@ void feedReplicationBacklog(void *ptr, size_t len) { unsigned char *p = ptr; server.master_repl_offset += len; - server.master_repl_meaningful_offset = server.master_repl_offset; /* This is a circular buffer, so write as much data we can at every * iteration and rewind the "idx" index if we reach the limit. */ @@ -309,6 +307,40 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } } +/* This is a debugging function that gets called when we detect something + * wrong with the replication protocol: the goal is to peek into the + * replication backlog and show a few final bytes to make simpler to + * guess what kind of bug it could be. */ +void showLatestBacklog(void) { + if (server.repl_backlog == NULL) return; + + long long dumplen = 256; + if (server.repl_backlog_histlen < dumplen) + dumplen = server.repl_backlog_histlen; + + /* Identify the first byte to dump. */ + long long idx = + (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) % + server.repl_backlog_size; + + /* Scan the circular buffer to collect 'dumplen' bytes. */ + sds dump = sdsempty(); + while(dumplen) { + long long thislen = + ((server.repl_backlog_size - idx) < dumplen) ? + (server.repl_backlog_size - idx) : dumplen; + + dump = sdscatrepr(dump,server.repl_backlog+idx,thislen); + dumplen -= thislen; + idx = 0; + } + + /* Finally log such bytes: this is vital debugging info to + * understand what happened. */ + serverLog(LL_WARNING,"Latest backlog is: '%s'", dump); + sdsfree(dump); +} + /* This function is used in order to proxy what we receive from our master * to our sub-slaves. */ #include <ctype.h> @@ -1831,7 +1863,6 @@ void readSyncBulkPayload(connection *conn) { * we are starting a new history. */ memcpy(server.replid,server.master->replid,sizeof(server.replid)); server.master_repl_offset = server.master->reploff; - server.master_repl_meaningful_offset = server.master->reploff; clearReplicationId2(); /* Let's create the replication backlog if needed. Slaves need to @@ -2086,7 +2117,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { memcpy(server.cached_master->replid,new,sizeof(server.replid)); /* Disconnect all the sub-slaves: they need to be notified. */ - disconnectSlaves(0); + disconnectSlaves(); } } @@ -2359,7 +2390,7 @@ void syncWithMaster(connection *conn) { * as well, if we have any sub-slaves. The master may transfer us an * entirely different data set and we have no way to incrementally feed * our slaves after that. */ - disconnectSlaves(0); /* Force our slaves to resync with us as well. */ + disconnectSlaves(); /* Force our slaves to resync with us as well. */ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */ /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC @@ -2506,7 +2537,7 @@ void replicationSetMaster(char *ip, int port) { /* Force our slaves to resync with us as well. They may hopefully be able * to partially resync with us, but we can notify the replid change. */ - disconnectSlaves(0); + disconnectSlaves(); cancelReplicationHandshake(); /* Before destroying our master state, create a cached master using * our own parameters, to later PSYNC with the new master. */ @@ -2557,7 +2588,7 @@ void replicationUnsetMaster(void) { * of the replication ID change (see shiftReplicationId() call). However * the slaves will be able to partially resync with us, so it will be * a very fast reconnection. */ - disconnectSlaves(0); + disconnectSlaves(); server.repl_state = REPL_STATE_NONE; /* We need to make sure the new master will start the replication stream @@ -2759,11 +2790,6 @@ void replicationCacheMaster(client *c) { * pending outputs to the master. */ sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); - - /* Adjust reploff and read_reploff to the last meaningful offset we - * executed. This is the offset the replica will use for future PSYNC. */ - int offset_adjusted; - server.master->reploff = adjustMeaningfulReplOffset(&offset_adjusted); server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); @@ -2786,53 +2812,6 @@ void replicationCacheMaster(client *c) { * so make sure to adjust the replication state. This function will * also set server.master to NULL. */ replicationHandleMasterDisconnection(); - - /* If we trimmed this replica backlog, we need to disconnect our chained - * replicas (if any), otherwise they may have the PINGs we removed - * from the stream and their offset would no longer match: upon - * disconnection they will also trim the final PINGs and will be able - * to incrementally sync without issues. */ - if (offset_adjusted) disconnectSlaves(1); -} - -/* If the "meaningful" offset, that is the offset without the final PINGs - * in the stream, is different than the last offset, use it instead: - * often when the master is no longer reachable, replicas will never - * receive the PINGs, however the master will end with an incremented - * offset because of the PINGs and will not be able to incrementally - * PSYNC with the new master. - * This function trims the replication backlog when needed, and returns - * the offset to be used for future partial sync. - * - * If the integer 'adjusted' was passed by reference, it is set to 1 - * if the function call actually modified the offset and the replication - * backlog, otherwise it is set to 0. It can be NULL if the caller is - * not interested in getting this info. */ -long long adjustMeaningfulReplOffset(int *adjusted) { - if (server.master_repl_offset > server.master_repl_meaningful_offset) { - long long delta = server.master_repl_offset - - server.master_repl_meaningful_offset; - serverLog(LL_NOTICE, - "Using the meaningful offset %lld instead of %lld to exclude " - "the final PINGs (%lld bytes difference)", - server.master_repl_meaningful_offset, - server.master_repl_offset, - delta); - server.master_repl_offset = server.master_repl_meaningful_offset; - if (server.repl_backlog_histlen <= delta) { - server.repl_backlog_histlen = 0; - server.repl_backlog_idx = 0; - } else { - server.repl_backlog_histlen -= delta; - server.repl_backlog_idx = - (server.repl_backlog_idx + (server.repl_backlog_size - delta)) % - server.repl_backlog_size; - } - if (adjusted) *adjusted = 1; - } else { - if (adjusted) *adjusted = 0; - } - return server.master_repl_offset; } /* This function is called when a master is turend into a slave, in order to @@ -2854,7 +2833,7 @@ void replicationCacheMasterUsingMyself(void) { * by replicationCreateMasterClient(). We'll later set the created * master as server.cached_master, so the replica will use such * offset for PSYNC. */ - server.master_initial_offset = adjustMeaningfulReplOffset(NULL); + server.master_initial_offset = server.master_repl_offset; /* The master client we create can be set to any DBID, because * the new master will start its replication stream with SELECT. */ @@ -3246,18 +3225,10 @@ void replicationCron(void) { clientsArePaused(); if (!manual_failover_in_progress) { - long long before_ping = server.master_repl_meaningful_offset; ping_argv[0] = createStringObject("PING",4); replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1); decrRefCount(ping_argv[0]); - /* The server.master_repl_meaningful_offset variable represents - * the offset of the replication stream without the pending PINGs. - * This is useful to set the right replication offset for PSYNC - * when the master is turned into a replica. Otherwise pending - * PINGs may not allow it to perform an incremental sync with the - * new master. */ - server.master_repl_meaningful_offset = before_ping; } } diff --git a/src/server.c b/src/server.c index 5bc4666ee..b7a6a928f 100644 --- a/src/server.c +++ b/src/server.c @@ -2394,7 +2394,6 @@ void initServerConfig(void) { server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; server.repl_down_since = 0; /* Never connected, repl is down since EVER. */ server.master_repl_offset = 0; - server.master_repl_meaningful_offset = 0; /* Replication partial resync backlog */ server.repl_backlog = NULL; @@ -4471,7 +4470,6 @@ sds genRedisInfoString(const char *section) { "master_replid:%s\r\n" "master_replid2:%s\r\n" "master_repl_offset:%lld\r\n" - "master_repl_meaningful_offset:%lld\r\n" "second_repl_offset:%lld\r\n" "repl_backlog_active:%d\r\n" "repl_backlog_size:%lld\r\n" @@ -4480,7 +4478,6 @@ sds genRedisInfoString(const char *section) { server.replid, server.replid2, server.master_repl_offset, - server.master_repl_meaningful_offset, server.second_replid_offset, server.repl_backlog != NULL, server.repl_backlog_size, @@ -4858,7 +4855,6 @@ void loadDataFromDisk(void) { { memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); server.master_repl_offset = rsi.repl_offset; - server.master_repl_meaningful_offset = rsi.repl_offset; /* If we are a slave, create a cached master from this * information, in order to allow partial resynchronizations * with masters. */ diff --git a/src/server.h b/src/server.h index 2d17d69c8..a08585292 100644 --- a/src/server.h +++ b/src/server.h @@ -1261,7 +1261,6 @@ struct redisServer { char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/ long long master_repl_offset; /* My current replication offset */ - long long master_repl_meaningful_offset; /* Offset minus latest PINGs. */ long long second_replid_offset; /* Accept offsets up to this for replid2. */ int slaveseldb; /* Last SELECTed DB in replication output */ int repl_ping_slave_period; /* Master pings the slave every N seconds */ @@ -1660,7 +1659,7 @@ int getClientType(client *c); int getClientTypeByName(char *name); char *getClientTypeName(int class); void flushSlavesOutputBuffers(void); -void disconnectSlaves(int async); +void disconnectSlaves(void); int listenToPort(int port, int *fds, int *count); void pauseClients(mstime_t duration); int clientsArePaused(void); @@ -1811,6 +1810,7 @@ void clearReplicationId2(void); void chopReplicationBacklog(void); void replicationCacheMasterUsingMyself(void); void feedReplicationBacklog(void *ptr, size_t len); +void showLatestBacklog(void); void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask); void rdbPipeWriteHandlerConnRemoved(struct connection *conn); diff --git a/tests/instances.tcl b/tests/instances.tcl index 0a0cbab12..3a4fadca0 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -25,6 +25,7 @@ set ::sentinel_instances {} set ::redis_instances {} set ::sentinel_base_port 20000 set ::redis_base_port 30000 +set ::redis_port_count 1024 set ::pids {} ; # We kill everything at exit set ::dirs {} ; # We remove all the temp dirs at exit set ::run_matching {} ; # If non empty, only tests matching pattern are run. @@ -57,7 +58,7 @@ proc exec_instance {type cfgfile} { # Spawn a redis or sentinel instance, depending on 'type'. proc spawn_instance {type base_port count {conf {}}} { for {set j 0} {$j < $count} {incr j} { - set port [find_available_port $base_port] + set port [find_available_port $base_port $::redis_port_count] incr base_port puts "Starting $type #$j at port $port" diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl index 2c2303141..5a9a46d16 100644 --- a/tests/integration/psync2-pingoff.tcl +++ b/tests/integration/psync2-pingoff.tcl @@ -1,6 +1,9 @@ -# Test the meaningful offset implementation to make sure masters -# are able to PSYNC with replicas even if the replication stream -# has pending PINGs at the end. +# These tests were added together with the meaningful offset implementation +# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that +# servers are able to PSYNC with replicas even if the replication stream has +# PINGs at the end which present in one sever and missing on another. +# We keep these tests just because they reproduce edge cases in the replication +# logic in hope they'll be able to spot some problem in the future. start_server {tags {"psync2"}} { start_server {} { @@ -16,7 +19,7 @@ start_server {} { } # Setup replication - test "PSYNC2 meaningful offset: setup" { + test "PSYNC2 pingoff: setup" { $R(1) replicaof $R_host(0) $R_port(0) $R(0) set foo bar wait_for_condition 50 1000 { @@ -27,7 +30,7 @@ start_server {} { } } - test "PSYNC2 meaningful offset: write and wait replication" { + test "PSYNC2 pingoff: write and wait replication" { $R(0) INCR counter $R(0) INCR counter $R(0) INCR counter @@ -41,7 +44,7 @@ start_server {} { # In this test we'll make sure the replica will get stuck, but with # an active connection: this way the master will continue to send PINGs # every second (we modified the PING period earlier) - test "PSYNC2 meaningful offset: pause replica and promote it" { + test "PSYNC2 pingoff: pause replica and promote it" { $R(1) MULTI $R(1) DEBUG SLEEP 5 $R(1) SLAVEOF NO ONE @@ -50,14 +53,22 @@ start_server {} { } test "Make the old master a replica of the new one and check conditions" { - set sync_partial [status $R(1) sync_partial_ok] - assert {$sync_partial == 0} + assert_equal [status $R(1) sync_full] 0 $R(0) REPLICAOF $R_host(1) $R_port(1) wait_for_condition 50 1000 { - [status $R(1) sync_partial_ok] == 1 + [status $R(1) sync_full] == 1 } else { - fail "The new master was not able to partial sync" + fail "The new master was not able to sync" } + + # make sure replication is still alive and kicking + $R(1) incr x + wait_for_condition 50 1000 { + [$R(0) get x] == 1 + } else { + fail "replica didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] } }} @@ -65,6 +76,106 @@ start_server {} { start_server {tags {"psync2"}} { start_server {} { start_server {} { +start_server {} { +start_server {} { + test {test various edge cases of repl topology changes with missing pings at the end} { + set master [srv -4 client] + set master_host [srv -4 host] + set master_port [srv -4 port] + set replica1 [srv -3 client] + set replica2 [srv -2 client] + set replica3 [srv -1 client] + set replica4 [srv -0 client] + + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + $replica3 replicaof $master_host $master_port + $replica4 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $master connected_slaves] == 4 + } else { + fail "replicas didn't connect" + } + + $master incr x + wait_for_condition 50 1000 { + [$replica1 get x] == 1 && [$replica2 get x] == 1 && + [$replica3 get x] == 1 && [$replica4 get x] == 1 + } else { + fail "replicas didn't get incr" + } + + # disconnect replica1 and replica2 + # and wait for the master to send a ping to replica3 and replica4 + $replica1 replicaof no one + $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id + $master config set repl-ping-replica-period 1 + after 1500 + + # make everyone sync from the replica1 that didn't get the last ping from the old master + # replica4 will keep syncing from the old master which now syncs from replica1 + # and replica2 will re-connect to the old master (which went back in time) + set new_master_host [srv -3 host] + set new_master_port [srv -3 port] + $replica3 replicaof $new_master_host $new_master_port + $master replicaof $new_master_host $new_master_port + $replica2 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $replica2 master_link_status] == "up" && + [status $replica3 master_link_status] == "up" && + [status $replica4 master_link_status] == "up" && + [status $master master_link_status] == "up" + } else { + fail "replicas didn't connect" + } + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 2 && + [$replica3 get x] == 2 && + [$replica4 get x] == 2 && + [$master get x] == 2 + } else { + fail "replicas didn't get incr" + } + + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 + + # force psync + $master client kill type master + $replica2 client kill type master + $replica3 client kill type master + $replica4 client kill type master + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 3 && + [$replica3 get x] == 3 && + [$replica4 get x] == 3 && + [$master get x] == 3 + } else { + fail "replicas didn't get incr" + } + + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 +} +}}}}} + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { for {set j 0} {$j < 3} {incr j} { set R($j) [srv [expr 0-$j] client] @@ -97,7 +208,7 @@ start_server {} { [status $R(1) master_link_status] == "up" && [status $R(2) master_link_status] == "up" && [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && - [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 + [status $R(1) sync_partial_ok] == $sync_partial_replica } else { fail "Disconnected replica failed to PSYNC with master" } @@ -106,7 +217,15 @@ start_server {} { # offsets match with the master assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] + + # make sure replication is still alive and kicking + $R(0) incr counter2 + wait_for_condition 50 1000 { + [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2 + } else { + fail "replicas didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] } }}} diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 29a880f99..3f636463a 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -242,7 +242,6 @@ start_server {} { show_cluster_status fail "Replicas and master offsets were unable to match *exactly*." } - $R($master_id) config set repl-ping-replica-period 10 # Limit anyway the maximum number of cycles. This is useful when the # test is skipped via --only option of the test suite. In that case @@ -370,103 +369,3 @@ start_server {} { } }}}}} - -start_server {tags {"psync2"}} { -start_server {} { -start_server {} { -start_server {} { -start_server {} { - test {pings at the end of replication stream are ignored for psync} { - set master [srv -4 client] - set master_host [srv -4 host] - set master_port [srv -4 port] - set replica1 [srv -3 client] - set replica2 [srv -2 client] - set replica3 [srv -1 client] - set replica4 [srv -0 client] - - $replica1 replicaof $master_host $master_port - $replica2 replicaof $master_host $master_port - $replica3 replicaof $master_host $master_port - $replica4 replicaof $master_host $master_port - wait_for_condition 50 1000 { - [status $master connected_slaves] == 4 - } else { - fail "replicas didn't connect" - } - - $master incr x - wait_for_condition 50 1000 { - [$replica1 get x] == 1 && [$replica2 get x] == 1 && - [$replica3 get x] == 1 && [$replica4 get x] == 1 - } else { - fail "replicas didn't get incr" - } - - # disconnect replica1 and replica2 - # and wait for the master to send a ping to replica3 and replica4 - $replica1 replicaof no one - $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id - $master config set repl-ping-replica-period 1 - after 1500 - - # make everyone sync from the replica1 that didn't get the last ping from the old master - # replica4 will keep syncing from the old master which now syncs from replica1 - # and replica2 will re-connect to the old master (which went back in time) - set new_master_host [srv -3 host] - set new_master_port [srv -3 port] - $replica3 replicaof $new_master_host $new_master_port - $master replicaof $new_master_host $new_master_port - $replica2 replicaof $master_host $master_port - wait_for_condition 50 1000 { - [status $replica2 master_link_status] == "up" && - [status $replica3 master_link_status] == "up" && - [status $replica4 master_link_status] == "up" && - [status $master master_link_status] == "up" - } else { - fail "replicas didn't connect" - } - - # make sure replication is still alive and kicking - $replica1 incr x - wait_for_condition 50 1000 { - [$replica2 get x] == 2 && - [$replica3 get x] == 2 && - [$replica4 get x] == 2 && - [$master get x] == 2 - } else { - fail "replicas didn't get incr" - } - - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 - assert_equal [status $replica2 sync_full] 0 - assert_equal [status $replica3 sync_full] 0 - assert_equal [status $replica4 sync_full] 0 - - # force psync - $master client kill type master - $replica2 client kill type master - $replica3 client kill type master - $replica4 client kill type master - - # make sure replication is still alive and kicking - $replica1 incr x - wait_for_condition 50 1000 { - [$replica2 get x] == 3 && - [$replica3 get x] == 3 && - [$replica4 get x] == 3 && - [$master get x] == 3 - } else { - fail "replicas didn't get incr" - } - - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 - assert_equal [status $replica2 sync_full] 0 - assert_equal [status $replica3 sync_full] 0 - assert_equal [status $replica4 sync_full] 0 -} -}}}}} diff --git a/tests/modules/Makefile b/tests/modules/Makefile index 363231a87..39b8e6efa 100644 --- a/tests/modules/Makefile +++ b/tests/modules/Makefile @@ -28,11 +28,14 @@ TEST_MODULES = \ all: $(TEST_MODULES) +32bit: + $(MAKE) CFLAGS="-m32" LDFLAGS="-melf_i386" + %.xo: %.c ../../src/redismodule.h $(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ %.so: %.xo - $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LDFLAGS) $(LIBS) -lc .PHONY: clean diff --git a/tests/support/server.tcl b/tests/support/server.tcl index d086366dc..146ebc72c 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -214,14 +214,14 @@ proc start_server {options {code undefined}} { dict set config dir [tmpdir server] # start every server on a different port - set ::port [find_available_port [expr {$::port+1}]] + set port [find_available_port $::baseport $::portcount] if {$::tls} { dict set config "port" 0 - dict set config "tls-port" $::port + dict set config "tls-port" $port dict set config "tls-cluster" "yes" dict set config "tls-replication" "yes" } else { - dict set config port $::port + dict set config port $port } set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]] @@ -243,10 +243,10 @@ proc start_server {options {code undefined}} { set server_started 0 while {$server_started == 0} { if {$::verbose} { - puts -nonewline "=== ($tags) Starting server ${::host}:${::port} " + puts -nonewline "=== ($tags) Starting server ${::host}:${port} " } - send_data_packet $::test_server_fd "server-spawning" "port $::port" + send_data_packet $::test_server_fd "server-spawning" "port $port" if {$::valgrind} { set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &] @@ -291,19 +291,19 @@ proc start_server {options {code undefined}} { # for availability. Other test clients may grab the port before we # are able to do it for example. if {$port_busy} { - puts "Port $::port was already busy, trying another port..." - set ::port [find_available_port [expr {$::port+1}]] + puts "Port $port was already busy, trying another port..." + set port [find_available_port $::baseport $::portcount] if {$::tls} { - dict set config "tls-port" $::port + dict set config "tls-port" $port } else { - dict set config port $::port + dict set config port $port } create_server_config_file $config_file $config continue; # Try again } if {$code ne "undefined"} { - set serverisup [server_is_up $::host $::port $retrynum] + set serverisup [server_is_up $::host $port $retrynum] } else { set serverisup 1 } @@ -324,7 +324,6 @@ proc start_server {options {code undefined}} { # setup properties to be able to initialize a client object set port_param [expr $::tls ? {"tls-port"} : {"port"}] set host $::host - set port $::port if {[dict exists $config bind]} { set host [dict get $config bind] } if {[dict exists $config $port_param]} { set port [dict get $config $port_param] } diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 7ecf5b79c..8bec95374 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -344,21 +344,26 @@ proc roundFloat f { format "%.10g" $f } -proc find_available_port start { - for {set j $start} {$j < $start+1024} {incr j} { - if {[catch {set fd1 [socket 127.0.0.1 $j]}] && - [catch {set fd2 [socket 127.0.0.1 [expr $j+10000]]}]} { - return $j +set ::last_port_attempted 0 +proc find_available_port {start count} { + set port [expr $::last_port_attempted + 1] + for {set attempts 0} {$attempts < $count} {incr attempts} { + if {$port < $start || $port >= $start+$count} { + set port $start + } + if {[catch {set fd1 [socket 127.0.0.1 $port]}] && + [catch {set fd2 [socket 127.0.0.1 [expr $port+10000]]}]} { + set ::last_port_attempted $port + return $port } else { catch { close $fd1 close $fd2 } } + incr port } - if {$j == $start+1024} { - error "Can't find a non busy port in the $start-[expr {$start+1023}] range." - } + error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range." } # Test if TERM looks like to support colors diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index de0a64728..fba54acb5 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -65,12 +65,15 @@ set ::all_tests { unit/wait unit/pendingquerybuf unit/tls + unit/tracking } # Index to the next test to run in the ::all_tests list. set ::next_test 0 set ::host 127.0.0.1 -set ::port 21111 +set ::port 6379; # port for external server +set ::baseport 21111; # initial port for spawned redis servers +set ::portcount 8000; # we don't wanna use more than 10000 to avoid collision with cluster bus ports set ::traceleaks 0 set ::valgrind 0 set ::tls 0 @@ -228,26 +231,26 @@ proc test_server_main {} { set tclsh [info nameofexecutable] # Open a listening socket, trying different ports in order to find a # non busy one. - set port [find_available_port 11111] + set clientport [find_available_port 11111 32] if {!$::quiet} { - puts "Starting test server at port $port" + puts "Starting test server at port $clientport" } - socket -server accept_test_clients -myaddr 127.0.0.1 $port + socket -server accept_test_clients -myaddr 127.0.0.1 $clientport # Start the client instances set ::clients_pids {} if {$::external} { set p [exec $tclsh [info script] {*}$::argv \ - --client $port --port $::port &] + --client $clientport &] lappend ::clients_pids $p } else { - set start_port [expr {$::port+100}] + set start_port $::baseport + set port_count [expr {$::portcount / $::numclients}] for {set j 0} {$j < $::numclients} {incr j} { - set start_port [find_available_port $start_port] set p [exec $tclsh [info script] {*}$::argv \ - --client $port --port $start_port &] + --client $clientport --baseport $start_port --portcount $port_count &] lappend ::clients_pids $p - incr start_port 10 + incr start_port $port_count } } @@ -510,6 +513,10 @@ proc print_help_screen {} { "--loop Execute the specified set of tests forever." "--wait-server Wait after server is started (so that you can attach a debugger)." "--tls Run tests in TLS mode." + "--host <addr> Run tests against an external host." + "--port <port> TCP port to use against external host." + "--baseport <port> Initial port number for spawned redis servers." + "--portcount <num> Port range for spawned redis servers." "--help Print this help screen." } "\n"] } @@ -560,6 +567,12 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--port}} { set ::port $arg incr j + } elseif {$opt eq {--baseport}} { + set ::baseport $arg + incr j + } elseif {$opt eq {--portcount}} { + set ::portcount $arg + incr j } elseif {$opt eq {--accurate}} { set ::accurate 1 } elseif {$opt eq {--force-failure}} { diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 7720c055a..20a32e795 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -167,9 +167,9 @@ start_server {tags {"other"}} { tags {protocol} { test {PIPELINING stresser (also a regression for the old epoll bug)} { if {$::tls} { - set fd2 [::tls::socket $::host $::port] + set fd2 [::tls::socket [srv host] [srv port]] } else { - set fd2 [socket $::host $::port] + set fd2 [socket [srv host] [srv port]] } fconfigure $fd2 -encoding binary -translation binary puts -nonewline $fd2 "SELECT 9\r\n" |