15 files changed, 268 insertions, 274 deletions
diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml
index c22d49895..acc4dd33a 100644
--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
@@ -47,7 +47,9 @@ jobs:
         sudo apt-get install tcl8.5
         ./runtest --accurate --verbose
     - name: module api test
-      run: ./runtest-moduleapi --verbose
+      run: |
+        make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time
+        ./runtest-moduleapi --verbose
 
   test-tls:
     runs-on: ubuntu-latest
diff --git a/src/acl.c b/src/acl.c
index c3a724c33..6dd0f70ac 100644
--- a/src/acl.c
+++ b/src/acl.c
@@ -732,10 +732,11 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) {
  * EEXIST: You are adding a key pattern after "*" was already added. This is
  *         almost surely an error on the user side.
  * ENODEV: The password you are trying to remove from the user does not exist.
- * EBADMSG: The hash you are trying to add is not a valid hash. 
+ * EBADMSG: The hash you are trying to add is not a valid hash.
  */
 int ACLSetUser(user *u, const char *op, ssize_t oplen) {
     if (oplen == -1) oplen = strlen(op);
+    if (oplen == 0) return C_OK; /* Empty string is a no-operation. */
     if (!strcasecmp(op,"on")) {
         u->flags |= USER_FLAG_ENABLED;
         u->flags &= ~USER_FLAG_DISABLED;
@@ -1297,7 +1298,7 @@ sds ACLLoadFromFile(const char *filename) {
         if (lines[i][0] == '\0') continue;
 
         /* Split into arguments */
-        argv = sdssplitargs(lines[i],&argc);
+        argv = sdssplitlen(lines[i],sdslen(lines[i])," ",1,&argc);
         if (argv == NULL) {
             errors = sdscatprintf(errors,
                      "%s:%d: unbalanced quotes in acl line. ",
@@ -1329,11 +1330,14 @@ sds ACLLoadFromFile(const char *filename) {
             continue;
         }
 
-        /* Try to process the line using the fake user to validate iif
-         * the rules are able to apply cleanly. */
+        /* Try to process the line using the fake user to validate if
+         * the rules are able to apply cleanly. At this stage we also
+         * trim trailing spaces, so that we don't have to handle that
+         * in ACLSetUser(). */
         ACLSetUser(fakeuser,"reset",-1);
         int j;
         for (j = 2; j < argc; j++) {
+            argv[j] = sdstrim(argv[j],"\t\r\n");
             if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) {
                 char *errmsg = ACLSetUserStringError();
                 errors = sdscatprintf(errors,
diff --git a/src/cluster.c b/src/cluster.c
index a2fab323a..24b14d1dc 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1463,7 +1463,10 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
             }
         } else {
             /* If it's not in NOADDR state and we don't have it, we
-             * start a handshake process against this IP/PORT pairs.
+             * add it to our trusted dict with exact nodeid and flag.
+             * Note that we cannot simply start a handshake against
+             * this IP/PORT pairs, since IP/PORT can be reused already,
+             * otherwise we risk joining another cluster.
              *
              * Note that we require that the sender of this gossip message
              * is a well known node in our cluster, otherwise we risk
@@ -1472,7 +1475,12 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
                 !(flags & CLUSTER_NODE_NOADDR) &&
                 !clusterBlacklistExists(g->nodename))
             {
-                clusterStartHandshake(g->ip,ntohs(g->port),ntohs(g->cport));
+                clusterNode *node;
+                node = createClusterNode(g->nodename, flags);
+                memcpy(node->ip,g->ip,NET_IP_STR_LEN);
+                node->port = ntohs(g->port);
+                node->cport = ntohs(g->cport);
+                clusterAddNode(node);
             }
         }
 
diff --git a/src/networking.c b/src/networking.c
index 364654642..8d3e057b7 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -396,31 +396,7 @@ void addReplyErrorLength(client *c, const char *s, size_t len) {
         if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog &&
             server.repl_backlog_histlen > 0)
         {
-            long long dumplen = 256;
-            if (server.repl_backlog_histlen < dumplen)
-                dumplen = server.repl_backlog_histlen;
-
-            /* Identify the first byte to dump. */
-            long long idx =
-              (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) %
-               server.repl_backlog_size;
-
-            /* Scan the circular buffer to collect 'dumplen' bytes. */
-            sds dump = sdsempty();
-            while(dumplen) {
-                long long thislen =
-                    ((server.repl_backlog_size - idx) < dumplen) ?
-                    (server.repl_backlog_size - idx) : dumplen;
-
-                dump = sdscatrepr(dump,server.repl_backlog+idx,thislen);
-                dumplen -= thislen;
-                idx = 0;
-            }
-
-            /* Finally log such bytes: this is vital debugging info to
-             * understand what happened. */
-            serverLog(LL_WARNING,"Latest backlog is: '%s'", dump);
-            sdsfree(dump);
+            showLatestBacklog();
         }
         server.stat_unexpected_error_replies++;
     }
@@ -1037,25 +1013,14 @@ static void freeClientArgv(client *c) {
 
 /* Close all the slaves connections. This is useful in chained replication
  * when we resync with our own master and want to force all our slaves to
- * resync with us as well.
- *
- * If 'async' is non-zero we free the clients asynchronously. This is needed
- * when we call this function from a context where in the chain of the
- * callers somebody is iterating the list of clients. For instance when
- * CLIENT KILL TYPE master is called, caching the master client may
- * adjust the meaningful offset of replication, and in turn call
- * discionectSlaves(). Since CLIENT KILL iterates the clients this is
- * not safe. */
-void disconnectSlaves(int async) {
+ * resync with us as well. */
+void disconnectSlaves(void) {
     listIter li;
     listNode *ln;
     listRewind(server.slaves,&li);
     while((ln = listNext(&li))) {
         listNode *ln = listFirst(server.slaves);
-        if (async)
-            freeClientAsync((client*)ln->value);
-        else
-            freeClient((client*)ln->value);
+        freeClient((client*)ln->value);
     }
 }
 
@@ -1553,6 +1518,19 @@ int processInlineBuffer(client *c) {
     if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE)
         c->repl_ack_time = server.unixtime;
 
+    /* Masters should never send us inline protocol to run actual
+     * commands. If this happens, it is likely due to a bug in Redis where
+     * we got some desynchronization in the protocol, for example
+     * beause of a PSYNC gone bad.
+     *
+     * However the is an exception: masters may send us just a newline
+     * to keep the connection active. */
+    if (querylen != 0 && c->flags & CLIENT_MASTER) {
+        serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master.");
+        setProtocolError("Master using the inline protocol. Desync?",c);
+        return C_ERR;
+    }
+
     /* Move querybuffer position to the next query in the buffer. */
     c->qb_pos += querylen+linefeed_chars;
 
@@ -1576,7 +1554,7 @@ int processInlineBuffer(client *c) {
  * CLIENT_PROTOCOL_ERROR. */
 #define PROTO_DUMP_LEN 128
 static void setProtocolError(const char *errstr, client *c) {
-    if (server.verbosity <= LL_VERBOSE) {
+    if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) {
         sds client = catClientInfoString(sdsempty(),c);
 
         /* Sample some protocol to given an idea about what was inside. */
@@ -1595,7 +1573,9 @@ static void setProtocolError(const char *errstr, client *c) {
         }
 
         /* Log all the client and protocol info. */
-        serverLog(LL_VERBOSE,
+        int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING :
+                                                    LL_VERBOSE;
+        serverLog(loglevel,
             "Protocol error (%s) from client: %s. %s", errstr, client, buf);
         sdsfree(client);
     }
@@ -1754,7 +1734,6 @@ int processMultibulkBuffer(client *c) {
  * 2. In the case of master clients, the replication offset is updated.
  * 3. Propagate commands we got from our master to replicas down the line. */
 void commandProcessed(client *c) {
-    int cmd_is_ping = c->cmd && c->cmd->proc == pingCommand;
     long long prev_offset = c->reploff;
     if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
         /* Update the applied replication offset of our master. */
@@ -1779,16 +1758,11 @@ void commandProcessed(client *c) {
      * sub-replicas and to the replication backlog. */
     if (c->flags & CLIENT_MASTER) {
         long long applied = c->reploff - prev_offset;
-        long long prev_master_repl_meaningful_offset = server.master_repl_meaningful_offset;
         if (applied) {
             replicationFeedSlavesFromMasterStream(server.slaves,
                     c->pending_querybuf, applied);
             sdsrange(c->pending_querybuf,applied,-1);
         }
-        /* The server.master_repl_meaningful_offset variable represents
-         * the offset of the replication stream without the pending PINGs. */
-        if (cmd_is_ping)
-            server.master_repl_meaningful_offset = prev_master_repl_meaningful_offset;
     }
 }
 
diff --git a/src/replication.c b/src/replication.c
index 2b21b02d8..063a8705e 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -39,7 +39,6 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 
-long long adjustMeaningfulReplOffset(int *adjusted);
 void replicationDiscardCachedMaster(void);
 void replicationResurrectCachedMaster(connection *conn);
 void replicationSendAck(void);
@@ -163,7 +162,6 @@ void feedReplicationBacklog(void *ptr, size_t len) {
     unsigned char *p = ptr;
 
     server.master_repl_offset += len;
-    server.master_repl_meaningful_offset = server.master_repl_offset;
 
     /* This is a circular buffer, so write as much data we can at every
      * iteration and rewind the "idx" index if we reach the limit. */
@@ -309,6 +307,40 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
     }
 }
 
+/* This is a debugging function that gets called when we detect something
+ * wrong with the replication protocol: the goal is to peek into the
+ * replication backlog and show a few final bytes to make simpler to
+ * guess what kind of bug it could be. */
+void showLatestBacklog(void) {
+    if (server.repl_backlog == NULL) return;
+
+    long long dumplen = 256;
+    if (server.repl_backlog_histlen < dumplen)
+        dumplen = server.repl_backlog_histlen;
+
+    /* Identify the first byte to dump. */
+    long long idx =
+      (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) %
+       server.repl_backlog_size;
+
+    /* Scan the circular buffer to collect 'dumplen' bytes. */
+    sds dump = sdsempty();
+    while(dumplen) {
+        long long thislen =
+            ((server.repl_backlog_size - idx) < dumplen) ?
+            (server.repl_backlog_size - idx) : dumplen;
+
+        dump = sdscatrepr(dump,server.repl_backlog+idx,thislen);
+        dumplen -= thislen;
+        idx = 0;
+    }
+
+    /* Finally log such bytes: this is vital debugging info to
+     * understand what happened. */
+    serverLog(LL_WARNING,"Latest backlog is: '%s'", dump);
+    sdsfree(dump);
+}
+
 /* This function is used in order to proxy what we receive from our master
  * to our sub-slaves. */
 #include <ctype.h>
@@ -1831,7 +1863,6 @@ void readSyncBulkPayload(connection *conn) {
      * we are starting a new history. */
     memcpy(server.replid,server.master->replid,sizeof(server.replid));
     server.master_repl_offset = server.master->reploff;
-    server.master_repl_meaningful_offset = server.master->reploff;
     clearReplicationId2();
 
     /* Let's create the replication backlog if needed. Slaves need to
@@ -2086,7 +2117,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
                 memcpy(server.cached_master->replid,new,sizeof(server.replid));
 
                 /* Disconnect all the sub-slaves: they need to be notified. */
-                disconnectSlaves(0);
+                disconnectSlaves();
             }
         }
 
@@ -2359,7 +2390,7 @@ void syncWithMaster(connection *conn) {
      * as well, if we have any sub-slaves. The master may transfer us an
      * entirely different data set and we have no way to incrementally feed
      * our slaves after that. */
-    disconnectSlaves(0); /* Force our slaves to resync with us as well. */
+    disconnectSlaves(); /* Force our slaves to resync with us as well. */
     freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
 
     /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
@@ -2506,7 +2537,7 @@ void replicationSetMaster(char *ip, int port) {
 
     /* Force our slaves to resync with us as well. They may hopefully be able
      * to partially resync with us, but we can notify the replid change. */
-    disconnectSlaves(0);
+    disconnectSlaves();
     cancelReplicationHandshake();
     /* Before destroying our master state, create a cached master using
      * our own parameters, to later PSYNC with the new master. */
@@ -2557,7 +2588,7 @@ void replicationUnsetMaster(void) {
      * of the replication ID change (see shiftReplicationId() call). However
      * the slaves will be able to partially resync with us, so it will be
      * a very fast reconnection. */
-    disconnectSlaves(0);
+    disconnectSlaves();
     server.repl_state = REPL_STATE_NONE;
 
     /* We need to make sure the new master will start the replication stream
@@ -2759,11 +2790,6 @@ void replicationCacheMaster(client *c) {
      * pending outputs to the master. */
     sdsclear(server.master->querybuf);
     sdsclear(server.master->pending_querybuf);
-
-    /* Adjust reploff and read_reploff to the last meaningful offset we
-     * executed. This is the offset the replica will use for future PSYNC. */
-    int offset_adjusted;
-    server.master->reploff = adjustMeaningfulReplOffset(&offset_adjusted);
     server.master->read_reploff = server.master->reploff;
     if (c->flags & CLIENT_MULTI) discardTransaction(c);
     listEmpty(c->reply);
@@ -2786,53 +2812,6 @@ void replicationCacheMaster(client *c) {
      * so make sure to adjust the replication state. This function will
      * also set server.master to NULL. */
     replicationHandleMasterDisconnection();
-
-    /* If we trimmed this replica backlog, we need to disconnect our chained
-     * replicas (if any), otherwise they may have the PINGs we removed
-     * from the stream and their offset would no longer match: upon
-     * disconnection they will also trim the final PINGs and will be able
-     * to incrementally sync without issues. */
-    if (offset_adjusted) disconnectSlaves(1);
-}
-
-/* If the "meaningful" offset, that is the offset without the final PINGs
- * in the stream, is different than the last offset, use it instead:
- * often when the master is no longer reachable, replicas will never
- * receive the PINGs, however the master will end with an incremented
- * offset because of the PINGs and will not be able to incrementally
- * PSYNC with the new master.
- * This function trims the replication backlog when needed, and returns
- * the offset to be used for future partial sync.
- *
- * If the integer 'adjusted' was passed by reference, it is set to 1
- * if the function call actually modified the offset and the replication
- * backlog, otherwise it is set to 0. It can be NULL if the caller is
- * not interested in getting this info. */
-long long adjustMeaningfulReplOffset(int *adjusted) {
-    if (server.master_repl_offset > server.master_repl_meaningful_offset) {
-        long long delta = server.master_repl_offset -
-                          server.master_repl_meaningful_offset;
-        serverLog(LL_NOTICE,
-            "Using the meaningful offset %lld instead of %lld to exclude "
-            "the final PINGs (%lld bytes difference)",
-                server.master_repl_meaningful_offset,
-                server.master_repl_offset,
-                delta);
-        server.master_repl_offset = server.master_repl_meaningful_offset;
-        if (server.repl_backlog_histlen <= delta) {
-            server.repl_backlog_histlen = 0;
-            server.repl_backlog_idx = 0;
-        } else {
-            server.repl_backlog_histlen -= delta;
-            server.repl_backlog_idx =
-                (server.repl_backlog_idx + (server.repl_backlog_size - delta)) %
-                server.repl_backlog_size;
-        }
-        if (adjusted) *adjusted = 1;
-    } else {
-        if (adjusted) *adjusted = 0;
-    }
-    return server.master_repl_offset;
 }
 
 /* This function is called when a master is turend into a slave, in order to
@@ -2854,7 +2833,7 @@ void replicationCacheMasterUsingMyself(void) {
      * by replicationCreateMasterClient(). We'll later set the created
      * master as server.cached_master, so the replica will use such
      * offset for PSYNC. */
-    server.master_initial_offset = adjustMeaningfulReplOffset(NULL);
+    server.master_initial_offset = server.master_repl_offset;
 
     /* The master client we create can be set to any DBID, because
      * the new master will start its replication stream with SELECT. */
@@ -3246,18 +3225,10 @@ void replicationCron(void) {
             clientsArePaused();
 
         if (!manual_failover_in_progress) {
-            long long before_ping = server.master_repl_meaningful_offset;
             ping_argv[0] = createStringObject("PING",4);
             replicationFeedSlaves(server.slaves, server.slaveseldb,
                 ping_argv, 1);
             decrRefCount(ping_argv[0]);
-            /* The server.master_repl_meaningful_offset variable represents
-             * the offset of the replication stream without the pending PINGs.
-             * This is useful to set the right replication offset for PSYNC
-             * when the master is turned into a replica. Otherwise pending
-             * PINGs may not allow it to perform an incremental sync with the
-             * new master. */
-            server.master_repl_meaningful_offset = before_ping;
         }
     }
 
diff --git a/src/server.c b/src/server.c
index 5bc4666ee..b7a6a928f 100644
--- a/src/server.c
+++ b/src/server.c
@@ -2394,7 +2394,6 @@ void initServerConfig(void) {
     server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
     server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
     server.master_repl_offset = 0;
-    server.master_repl_meaningful_offset = 0;
 
     /* Replication partial resync backlog */
     server.repl_backlog = NULL;
@@ -4471,7 +4470,6 @@ sds genRedisInfoString(const char *section) {
             "master_replid:%s\r\n"
             "master_replid2:%s\r\n"
             "master_repl_offset:%lld\r\n"
-            "master_repl_meaningful_offset:%lld\r\n"
             "second_repl_offset:%lld\r\n"
             "repl_backlog_active:%d\r\n"
             "repl_backlog_size:%lld\r\n"
@@ -4480,7 +4478,6 @@ sds genRedisInfoString(const char *section) {
             server.replid,
             server.replid2,
             server.master_repl_offset,
-            server.master_repl_meaningful_offset,
             server.second_replid_offset,
             server.repl_backlog != NULL,
             server.repl_backlog_size,
@@ -4858,7 +4855,6 @@ void loadDataFromDisk(void) {
             {
                 memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
                 server.master_repl_offset = rsi.repl_offset;
-                server.master_repl_meaningful_offset = rsi.repl_offset;
                 /* If we are a slave, create a cached master from this
                  * information, in order to allow partial resynchronizations
                  * with masters. */
diff --git a/src/server.h b/src/server.h
index 2d17d69c8..a08585292 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1261,7 +1261,6 @@ struct redisServer {
     char replid[CONFIG_RUN_ID_SIZE+1];  /* My current replication ID. */
     char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
     long long master_repl_offset;   /* My current replication offset */
-    long long master_repl_meaningful_offset; /* Offset minus latest PINGs. */
     long long second_replid_offset; /* Accept offsets up to this for replid2. */
     int slaveseldb;                 /* Last SELECTed DB in replication output */
     int repl_ping_slave_period;     /* Master pings the slave every N seconds */
@@ -1660,7 +1659,7 @@ int getClientType(client *c);
 int getClientTypeByName(char *name);
 char *getClientTypeName(int class);
 void flushSlavesOutputBuffers(void);
-void disconnectSlaves(int async);
+void disconnectSlaves(void);
 int listenToPort(int port, int *fds, int *count);
 void pauseClients(mstime_t duration);
 int clientsArePaused(void);
@@ -1811,6 +1810,7 @@ void clearReplicationId2(void);
 void chopReplicationBacklog(void);
 void replicationCacheMasterUsingMyself(void);
 void feedReplicationBacklog(void *ptr, size_t len);
+void showLatestBacklog(void);
 void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
 void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
 
diff --git a/tests/instances.tcl b/tests/instances.tcl
index 0a0cbab12..3a4fadca0 100644
--- a/tests/instances.tcl
+++ b/tests/instances.tcl
@@ -25,6 +25,7 @@ set ::sentinel_instances {}
 set ::redis_instances {}
 set ::sentinel_base_port 20000
 set ::redis_base_port 30000
+set ::redis_port_count 1024
 set ::pids {} ; # We kill everything at exit
 set ::dirs {} ; # We remove all the temp dirs at exit
 set ::run_matching {} ; # If non empty, only tests matching pattern are run.
@@ -57,7 +58,7 @@ proc exec_instance {type cfgfile} {
 # Spawn a redis or sentinel instance, depending on 'type'.
 proc spawn_instance {type base_port count {conf {}}} {
     for {set j 0} {$j < $count} {incr j} {
-        set port [find_available_port $base_port]
+        set port [find_available_port $base_port $::redis_port_count]
         incr base_port
         puts "Starting $type #$j at port $port"
 
diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl
index 2c2303141..5a9a46d16 100644
--- a/tests/integration/psync2-pingoff.tcl
+++ b/tests/integration/psync2-pingoff.tcl
@@ -1,6 +1,9 @@
-# Test the meaningful offset implementation to make sure masters
-# are able to PSYNC with replicas even if the replication stream
-# has pending PINGs at the end.
+# These tests were added together with the meaningful offset implementation
+# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that
+# servers are able to PSYNC with replicas even if the replication stream has
+# PINGs at the end which present in one sever and missing on another.
+# We keep these tests just because they reproduce edge cases in the replication
+# logic in hope they'll be able to spot some problem in the future.
 
 start_server {tags {"psync2"}} {
 start_server {} {
@@ -16,7 +19,7 @@ start_server {} {
     }
 
     # Setup replication
-    test "PSYNC2 meaningful offset: setup" {
+    test "PSYNC2 pingoff: setup" {
         $R(1) replicaof $R_host(0) $R_port(0)
         $R(0) set foo bar
         wait_for_condition 50 1000 {
@@ -27,7 +30,7 @@ start_server {} {
         }
     }
 
-    test "PSYNC2 meaningful offset: write and wait replication" {
+    test "PSYNC2 pingoff: write and wait replication" {
         $R(0) INCR counter
         $R(0) INCR counter
         $R(0) INCR counter
@@ -41,7 +44,7 @@ start_server {} {
     # In this test we'll make sure the replica will get stuck, but with
     # an active connection: this way the master will continue to send PINGs
     # every second (we modified the PING period earlier)
-    test "PSYNC2 meaningful offset: pause replica and promote it" {
+    test "PSYNC2 pingoff: pause replica and promote it" {
         $R(1) MULTI
         $R(1) DEBUG SLEEP 5
         $R(1) SLAVEOF NO ONE
@@ -50,14 +53,22 @@ start_server {} {
     }
 
     test "Make the old master a replica of the new one and check conditions" {
-        set sync_partial [status $R(1) sync_partial_ok]
-        assert {$sync_partial == 0}
+        assert_equal [status $R(1) sync_full] 0
         $R(0) REPLICAOF $R_host(1) $R_port(1)
         wait_for_condition 50 1000 {
-            [status $R(1) sync_partial_ok] == 1
+            [status $R(1) sync_full] == 1
         } else {
-            fail "The new master was not able to partial sync"
+            fail "The new master was not able to sync"
         }
+
+        # make sure replication is still alive and kicking
+        $R(1) incr x
+        wait_for_condition 50 1000 {
+            [$R(0) get x] == 1
+        } else {
+            fail "replica didn't get incr"
+        }
+        assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
     }
 }}
 
@@ -65,6 +76,106 @@ start_server {} {
 start_server {tags {"psync2"}} {
 start_server {} {
 start_server {} {
+start_server {} {
+start_server {} {
+    test {test various edge cases of repl topology changes with missing pings at the end} {
+        set master [srv -4 client]
+        set master_host [srv -4 host]
+        set master_port [srv -4 port]
+        set replica1 [srv -3 client]
+        set replica2 [srv -2 client]
+        set replica3 [srv -1 client]
+        set replica4 [srv -0 client]
+
+        $replica1 replicaof $master_host $master_port
+        $replica2 replicaof $master_host $master_port
+        $replica3 replicaof $master_host $master_port
+        $replica4 replicaof $master_host $master_port
+        wait_for_condition 50 1000 {
+            [status $master connected_slaves] == 4
+        } else {
+            fail "replicas didn't connect"
+        }
+
+        $master incr x
+        wait_for_condition 50 1000 {
+            [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
+            [$replica3 get x] == 1 && [$replica4 get x] == 1
+        } else {
+            fail "replicas didn't get incr"
+        }
+
+        # disconnect replica1 and replica2
+        # and wait for the master to send a ping to replica3 and replica4
+        $replica1 replicaof no one
+        $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
+        $master config set repl-ping-replica-period 1
+        after 1500
+
+        # make everyone sync from the replica1 that didn't get the last ping from the old master
+        # replica4 will keep syncing from the old master which now syncs from replica1
+        # and replica2 will re-connect to the old master (which went back in time)
+        set new_master_host [srv -3 host]
+        set new_master_port [srv -3 port]
+        $replica3 replicaof $new_master_host $new_master_port
+        $master replicaof $new_master_host $new_master_port
+        $replica2 replicaof $master_host $master_port
+        wait_for_condition 50 1000 {
+            [status $replica2 master_link_status] == "up" &&
+            [status $replica3 master_link_status] == "up" &&
+            [status $replica4 master_link_status] == "up" &&
+            [status $master master_link_status] == "up"
+        } else {
+            fail "replicas didn't connect"
+        }
+
+        # make sure replication is still alive and kicking
+        $replica1 incr x
+        wait_for_condition 50 1000 {
+            [$replica2 get x] == 2 &&
+            [$replica3 get x] == 2 &&
+            [$replica4 get x] == 2 &&
+            [$master get x] == 2
+        } else {
+            fail "replicas didn't get incr"
+        }
+
+        # make sure we have the right amount of full syncs
+        assert_equal [status $master sync_full] 6
+        assert_equal [status $replica1 sync_full] 2
+        assert_equal [status $replica2 sync_full] 0
+        assert_equal [status $replica3 sync_full] 0
+        assert_equal [status $replica4 sync_full] 0
+
+        # force psync
+        $master client kill type master
+        $replica2 client kill type master
+        $replica3 client kill type master
+        $replica4 client kill type master
+
+        # make sure replication is still alive and kicking
+        $replica1 incr x
+        wait_for_condition 50 1000 {
+            [$replica2 get x] == 3 &&
+            [$replica3 get x] == 3 &&
+            [$replica4 get x] == 3 &&
+            [$master get x] == 3
+        } else {
+            fail "replicas didn't get incr"
+        }
+
+        # make sure we have the right amount of full syncs
+        assert_equal [status $master sync_full] 6
+        assert_equal [status $replica1 sync_full] 2
+        assert_equal [status $replica2 sync_full] 0
+        assert_equal [status $replica3 sync_full] 0
+        assert_equal [status $replica4 sync_full] 0
+}
+}}}}}
+
+start_server {tags {"psync2"}} {
+start_server {} {
+start_server {} {
 
     for {set j 0} {$j < 3} {incr j} {
         set R($j) [srv [expr 0-$j] client]
@@ -97,7 +208,7 @@ start_server {} {
             [status $R(1) master_link_status] == "up" && 
             [status $R(2) master_link_status] == "up" &&
             [status $R(0) sync_partial_ok] == $sync_partial_master + 1 &&
-            [status $R(1) sync_partial_ok] == $sync_partial_replica + 1
+            [status $R(1) sync_partial_ok] == $sync_partial_replica
         } else {
             fail "Disconnected replica failed to PSYNC with master"
         }
@@ -106,7 +217,15 @@ start_server {} {
         # offsets match with the master
         assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
         assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
-        assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset]
-        assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset]
+
+        # make sure replication is still alive and kicking
+        $R(0) incr counter2
+        wait_for_condition 50 1000 {
+            [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2
+        } else {
+            fail "replicas didn't get incr"
+        }
+        assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset]
+        assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset]
     }
 }}}
diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl
index 29a880f99..3f636463a 100644
--- a/tests/integration/psync2.tcl
+++ b/tests/integration/psync2.tcl
@@ -242,7 +242,6 @@ start_server {} {
             show_cluster_status
             fail "Replicas and master offsets were unable to match *exactly*."
         }
-        $R($master_id) config set repl-ping-replica-period 10
 
         # Limit anyway the maximum number of cycles. This is useful when the
         # test is skipped via --only option of the test suite. In that case
@@ -370,103 +369,3 @@ start_server {} {
     }
 
 }}}}}
-
-start_server {tags {"psync2"}} {
-start_server {} {
-start_server {} {
-start_server {} {
-start_server {} {
-    test {pings at the end of replication stream are ignored for psync} {
-        set master [srv -4 client]
-        set master_host [srv -4 host]
-        set master_port [srv -4 port]
-        set replica1 [srv -3 client]
-        set replica2 [srv -2 client]
-        set replica3 [srv -1 client]
-        set replica4 [srv -0 client]
-
-        $replica1 replicaof $master_host $master_port
-        $replica2 replicaof $master_host $master_port
-        $replica3 replicaof $master_host $master_port
-        $replica4 replicaof $master_host $master_port
-        wait_for_condition 50 1000 {
-            [status $master connected_slaves] == 4
-        } else {
-            fail "replicas didn't connect"
-        }
-
-        $master incr x
-        wait_for_condition 50 1000 {
-            [$replica1 get x] == 1 && [$replica2 get x] == 1 &&
-            [$replica3 get x] == 1 && [$replica4 get x] == 1
-        } else {
-            fail "replicas didn't get incr"
-        }
-
-        # disconnect replica1 and replica2
-        # and wait for the master to send a ping to replica3 and replica4
-        $replica1 replicaof no one
-        $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id
-        $master config set repl-ping-replica-period 1
-        after 1500
-
-        # make everyone sync from the replica1 that didn't get the last ping from the old master
-        # replica4 will keep syncing from the old master which now syncs from replica1
-        # and replica2 will re-connect to the old master (which went back in time)
-        set new_master_host [srv -3 host]
-        set new_master_port [srv -3 port]
-        $replica3 replicaof $new_master_host $new_master_port
-        $master replicaof $new_master_host $new_master_port
-        $replica2 replicaof $master_host $master_port
-        wait_for_condition 50 1000 {
-            [status $replica2 master_link_status] == "up" &&
-            [status $replica3 master_link_status] == "up" &&
-            [status $replica4 master_link_status] == "up" &&
-            [status $master master_link_status] == "up"
-        } else {
-            fail "replicas didn't connect"
-        }
-
-        # make sure replication is still alive and kicking
-        $replica1 incr x
-        wait_for_condition 50 1000 {
-            [$replica2 get x] == 2 &&
-            [$replica3 get x] == 2 &&
-            [$replica4 get x] == 2 &&
-            [$master get x] == 2
-        } else {
-            fail "replicas didn't get incr"
-        }
-
-        # make sure there are full syncs other than the initial ones
-        assert_equal [status $master sync_full] 4
-        assert_equal [status $replica1 sync_full] 0
-        assert_equal [status $replica2 sync_full] 0
-        assert_equal [status $replica3 sync_full] 0
-        assert_equal [status $replica4 sync_full] 0
-
-        # force psync
-        $master client kill type master
-        $replica2 client kill type master
-        $replica3 client kill type master
-        $replica4 client kill type master
-
-        # make sure replication is still alive and kicking
-        $replica1 incr x
-        wait_for_condition 50 1000 {
-            [$replica2 get x] == 3 &&
-            [$replica3 get x] == 3 &&
-            [$replica4 get x] == 3 &&
-            [$master get x] == 3
-        } else {
-            fail "replicas didn't get incr"
-        }
-
-        # make sure there are full syncs other than the initial ones
-        assert_equal [status $master sync_full] 4
-        assert_equal [status $replica1 sync_full] 0
-        assert_equal [status $replica2 sync_full] 0
-        assert_equal [status $replica3 sync_full] 0
-        assert_equal [status $replica4 sync_full] 0
-}
-}}}}}
diff --git a/tests/modules/Makefile b/tests/modules/Makefile
index 363231a87..39b8e6efa 100644
--- a/tests/modules/Makefile
+++ b/tests/modules/Makefile
@@ -28,11 +28,14 @@ TEST_MODULES = \
 
 all: $(TEST_MODULES)
 
+32bit:
+	$(MAKE) CFLAGS="-m32" LDFLAGS="-melf_i386"
+
 %.xo: %.c ../../src/redismodule.h
 	$(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
 
 %.so: %.xo
-	$(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc
+	$(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LDFLAGS) $(LIBS) -lc
 
 .PHONY: clean
 
diff --git a/tests/support/server.tcl b/tests/support/server.tcl
index d086366dc..146ebc72c 100644
--- a/tests/support/server.tcl
+++ b/tests/support/server.tcl
@@ -214,14 +214,14 @@ proc start_server {options {code undefined}} {
     dict set config dir [tmpdir server]
 
     # start every server on a different port
-    set ::port [find_available_port [expr {$::port+1}]]
+    set port [find_available_port $::baseport $::portcount]
     if {$::tls} {
         dict set config "port" 0
-        dict set config "tls-port" $::port
+        dict set config "tls-port" $port
         dict set config "tls-cluster" "yes"
         dict set config "tls-replication" "yes"
     } else {
-        dict set config port $::port
+        dict set config port $port
     }
 
     set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]]
@@ -243,10 +243,10 @@ proc start_server {options {code undefined}} {
     set server_started 0
     while {$server_started == 0} {
         if {$::verbose} {
-            puts -nonewline "=== ($tags) Starting server ${::host}:${::port} "
+            puts -nonewline "=== ($tags) Starting server ${::host}:${port} "
         }
 
-        send_data_packet $::test_server_fd "server-spawning" "port $::port"
+        send_data_packet $::test_server_fd "server-spawning" "port $port"
 
         if {$::valgrind} {
             set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &]
@@ -291,19 +291,19 @@ proc start_server {options {code undefined}} {
         # for availability. Other test clients may grab the port before we
         # are able to do it for example.
         if {$port_busy} {
-            puts "Port $::port was already busy, trying another port..."
-            set ::port [find_available_port [expr {$::port+1}]]
+            puts "Port $port was already busy, trying another port..."
+            set port [find_available_port $::baseport $::portcount]
             if {$::tls} {
-                dict set config "tls-port" $::port
+                dict set config "tls-port" $port
             } else {
-                dict set config port $::port
+                dict set config port $port
             }
             create_server_config_file $config_file $config
             continue; # Try again
         }
 
         if {$code ne "undefined"} {
-            set serverisup [server_is_up $::host $::port $retrynum]
+            set serverisup [server_is_up $::host $port $retrynum]
         } else {
             set serverisup 1
         }
@@ -324,7 +324,6 @@ proc start_server {options {code undefined}} {
     # setup properties to be able to initialize a client object
     set port_param [expr $::tls ? {"tls-port"} : {"port"}]
     set host $::host
-    set port $::port
     if {[dict exists $config bind]} { set host [dict get $config bind] }
     if {[dict exists $config $port_param]} { set port [dict get $config $port_param] }
 
diff --git a/tests/support/util.tcl b/tests/support/util.tcl
index 7ecf5b79c..8bec95374 100644
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@@ -344,21 +344,26 @@ proc roundFloat f {
     format "%.10g" $f
 }
 
-proc find_available_port start {
-    for {set j $start} {$j < $start+1024} {incr j} {
-        if {[catch {set fd1 [socket 127.0.0.1 $j]}] &&
-            [catch {set fd2 [socket 127.0.0.1 [expr $j+10000]]}]} {
-            return $j
+set ::last_port_attempted 0
+proc find_available_port {start count} {
+    set port [expr $::last_port_attempted + 1]
+    for {set attempts 0} {$attempts < $count} {incr attempts} {
+        if {$port < $start || $port >= $start+$count} {
+            set port $start
+        }
+        if {[catch {set fd1 [socket 127.0.0.1 $port]}] &&
+            [catch {set fd2 [socket 127.0.0.1 [expr $port+10000]]}]} {
+            set ::last_port_attempted $port
+            return $port
         } else {
             catch {
                 close $fd1
                 close $fd2
             }
         }
+        incr port
     }
-    if {$j == $start+1024} {
-        error "Can't find a non busy port in the $start-[expr {$start+1023}] range."
-    }
+    error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range."
 }
 
 # Test if TERM looks like to support colors
diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl
index de0a64728..fba54acb5 100644
--- a/tests/test_helper.tcl
+++ b/tests/test_helper.tcl
@@ -65,12 +65,15 @@ set ::all_tests {
     unit/wait
     unit/pendingquerybuf
     unit/tls
+    unit/tracking
 }
 # Index to the next test to run in the ::all_tests list.
 set ::next_test 0
 
 set ::host 127.0.0.1
-set ::port 21111
+set ::port 6379; # port for external server
+set ::baseport 21111; # initial port for spawned redis servers
+set ::portcount 8000; # we don't wanna use more than 10000 to avoid collision with cluster bus ports
 set ::traceleaks 0
 set ::valgrind 0
 set ::tls 0
@@ -228,26 +231,26 @@ proc test_server_main {} {
     set tclsh [info nameofexecutable]
     # Open a listening socket, trying different ports in order to find a
     # non busy one.
-    set port [find_available_port 11111]
+    set clientport [find_available_port 11111 32]
     if {!$::quiet} {
-        puts "Starting test server at port $port"
+        puts "Starting test server at port $clientport"
     }
-    socket -server accept_test_clients  -myaddr 127.0.0.1 $port
+    socket -server accept_test_clients  -myaddr 127.0.0.1 $clientport
 
     # Start the client instances
     set ::clients_pids {}
     if {$::external} {
         set p [exec $tclsh [info script] {*}$::argv \
-            --client $port --port $::port &]
+            --client $clientport &]
         lappend ::clients_pids $p
     } else {
-        set start_port [expr {$::port+100}]
+        set start_port $::baseport
+        set port_count [expr {$::portcount / $::numclients}]
         for {set j 0} {$j < $::numclients} {incr j} {
-            set start_port [find_available_port $start_port]
             set p [exec $tclsh [info script] {*}$::argv \
-                --client $port --port $start_port &]
+                --client $clientport --baseport $start_port --portcount $port_count &]
             lappend ::clients_pids $p
-            incr start_port 10
+            incr start_port $port_count
         }
     }
 
@@ -510,6 +513,10 @@ proc print_help_screen {} {
         "--loop             Execute the specified set of tests forever."
         "--wait-server      Wait after server is started (so that you can attach a debugger)."
         "--tls              Run tests in TLS mode."
+        "--host <addr>      Run tests against an external host."
+        "--port <port>      TCP port to use against external host."
+        "--baseport <port>  Initial port number for spawned redis servers."
+        "--portcount <num>  Port range for spawned redis servers."
         "--help             Print this help screen."
     } "\n"]
 }
@@ -560,6 +567,12 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
     } elseif {$opt eq {--port}} {
         set ::port $arg
         incr j
+    } elseif {$opt eq {--baseport}} {
+        set ::baseport $arg
+        incr j
+    } elseif {$opt eq {--portcount}} {
+        set ::portcount $arg
+        incr j
     } elseif {$opt eq {--accurate}} {
         set ::accurate 1
     } elseif {$opt eq {--force-failure}} {
diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl
index 7720c055a..20a32e795 100644
--- a/tests/unit/other.tcl
+++ b/tests/unit/other.tcl
@@ -167,9 +167,9 @@ start_server {tags {"other"}} {
     tags {protocol} {
         test {PIPELINING stresser (also a regression for the old epoll bug)} {
             if {$::tls} {
-                set fd2 [::tls::socket $::host $::port]
+                set fd2 [::tls::socket [srv host] [srv port]]
             } else {
-                set fd2 [socket $::host $::port]
+                set fd2 [socket [srv host] [srv port]]
             }
             fconfigure $fd2 -encoding binary -translation binary
             puts -nonewline $fd2 "SELECT 9\r\n"