diff options
8 files changed, 203 insertions, 17 deletions
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py index 9e0fe34d4db..9d4878298f4 100644 --- a/buildscripts/resmokelib/testing/fixtures/replicaset.py +++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py @@ -196,6 +196,7 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst """Wait for replica set tpo be ready.""" self._await_primary() self._await_secondaries() + self._await_stable_checkpoint() def _await_primary(self): # Wait for the primary to be elected. @@ -230,6 +231,63 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst time.sleep(0.1) # Wait a little bit before trying again. self.logger.info("Secondary on port %d is now available.", secondary.port) + def _await_stable_checkpoint(self): + # Since this method is called at startup we expect the first node to be primary even when + # self.all_nodes_electable is True. + primary = self.nodes[0] + primary_client = primary.mongo_client() + if self.auth_options is not None: + auth_db = primary_client[self.auth_options["authenticationDatabase"]] + auth_db.authenticate(self.auth_options["username"], + password=self.auth_options["password"], + mechanism=self.auth_options["authenticationMechanism"]) + # Algorithm precondition: All nodes must be in primary/secondary state. + # + # 1) Perform a majority write. This will guarantee the primary updates its commit point + # to the value of this write. + # + # 2) Perform a second write. This will guarantee that all nodes will update their commit + # point to a time that is >= the previous write. That will trigger a stable checkpoint + # on all nodes. + # TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to + # advance the commit point if the commit point being lagged is sufficient to choose a + # sync source. + admin = primary_client.get_database( + "admin", write_concern=pymongo.write_concern.WriteConcern(w="majority")) + admin.command("appendOplogNote", data={"await_stable_checkpoint": 1}) + admin.command("appendOplogNote", data={"await_stable_checkpoint": 2}) + + for node in self.nodes: + self.logger.info("Waiting for node on port %d to have a stable checkpoint.", node.port) + client = node.mongo_client(read_preference=pymongo.ReadPreference.SECONDARY) + client_admin = client["admin"] + if self.auth_options is not None: + client_auth_db = client[self.auth_options["authenticationDatabase"]] + client_auth_db.authenticate(self.auth_options["username"], + password=self.auth_options["password"], + mechanism=self.auth_options["authenticationMechanism"]) + + while True: + status = client_admin.command("replSetGetStatus") + # The `lastStableCheckpointTimestamp` field contains the timestamp of a previous + # checkpoint taken at a stable timestamp. At startup recovery, this field + # contains the timestamp reflected in the data. After startup recovery, it may + # be lagged and there may be a stable checkpoint at a newer timestamp. + last_stable = status.get("lastStableCheckpointTimestamp", None) + + # A missing `lastStableCheckpointTimestamp` field indicates that the storage + # engine does not support "recover to a stable timestamp". + if not last_stable: + break + + # A null `lastStableCheckpointTimestamp` indicates that the storage engine supports + # "recover to a stable timestamp" but does not have a stable checkpoint yet. + if last_stable.time: + self.logger.info("Node on port %d now has a stable checkpoint. Time: %s", + node.port, last_stable) + break + time.sleep(0.1) # Wait a little bit before trying again. + def _do_teardown(self): self.logger.info("Stopping all members of the replica set...") diff --git a/jstests/noPassthrough/auth_reject_mismatching_logical_times.js b/jstests/noPassthrough/auth_reject_mismatching_logical_times.js index 804251c63a2..0d2a368ad49 100644 --- a/jstests/noPassthrough/auth_reject_mismatching_logical_times.js +++ b/jstests/noPassthrough/auth_reject_mismatching_logical_times.js @@ -45,7 +45,9 @@ // Add shard with auth enabled. const rst = new ReplSetTest({nodes: 2}); rst.startSet({keyFile: "jstests/libs/key1", shardsvr: ""}); - rst.initiate(); + + // TODO: Wait for stable checkpoint when SERVER-32672 is fixed. + rst.initiateWithAnyNodeAsPrimary(null, "replSetInitiate", {doNotWaitForStableCheckpoint: true}); assert.commandWorked(st.s.adminCommand({addShard: rst.getURL()})); const testDB = st.s.getDB("test"); diff --git a/jstests/noPassthrough/auto_retry_on_network_error.js b/jstests/noPassthrough/auto_retry_on_network_error.js index 64c5ec6ae1f..b9bc5d6685b 100644 --- a/jstests/noPassthrough/auto_retry_on_network_error.js +++ b/jstests/noPassthrough/auto_retry_on_network_error.js @@ -43,7 +43,9 @@ const rst = new ReplSetTest({nodes: 1}); rst.startSet(); - rst.initiate(); + + // awaitLastStableCheckpointTimestamp runs an 'appendOplogNote' command which is not retryable. + rst.initiateWithAnyNodeAsPrimary(null, "replSetInitiate", {doNotWaitForStableCheckpoint: true}); const dbName = "test"; const collName = "auto_retry"; diff --git a/jstests/noPassthrough/read_majority.js b/jstests/noPassthrough/read_majority.js index 2cdf629927a..d4fbb75c367 100644 --- a/jstests/noPassthrough/read_majority.js +++ b/jstests/noPassthrough/read_majority.js @@ -40,7 +40,9 @@ load("jstests/libs/analyze_plan.js"); } }); replTest.startSet(); - replTest.initiate(); + // Cannot wait for a stable checkpoint with 'testingSnapshotBehaviorInIsolation' set. + replTest.initiateWithAnyNodeAsPrimary( + null, "replSetInitiate", {doNotWaitForStableCheckpoint: true}); const session = replTest.getPrimary().getDB("test").getMongo().startSession({causalConsistency: false}); diff --git a/jstests/noPassthrough/read_majority_reads.js b/jstests/noPassthrough/read_majority_reads.js index c8322a31c67..1f196856dd7 100644 --- a/jstests/noPassthrough/read_majority_reads.js +++ b/jstests/noPassthrough/read_majority_reads.js @@ -234,7 +234,9 @@ } }); replTest.startSet(); - replTest.initiate(); + // Cannot wait for a stable checkpoint with 'testingSnapshotBehaviorInIsolation' set. + replTest.initiateWithAnyNodeAsPrimary( + null, "replSetInitiate", {doNotWaitForStableCheckpoint: true}); var mongod = replTest.getPrimary(); diff --git a/jstests/noPassthrough/timestamp_index_builds.js b/jstests/noPassthrough/timestamp_index_builds.js index 3ebda4f3691..b55b1805e00 100644 --- a/jstests/noPassthrough/timestamp_index_builds.js +++ b/jstests/noPassthrough/timestamp_index_builds.js @@ -41,12 +41,7 @@ let coll = getColl(rst.getPrimary()); - // TODO Can be removed with SERVER-33165. - // - // Create a collection and perform two majority writes. This guarantees both nodes will have a - // stable timestamp. - assert.commandWorked( - coll.insert({}, {writeConcern: {w: "majority", wtimeout: rst.kDefaultTimeoutMS}})); + // Create a collection and wait for the stable timestamp to exceed its creation on both nodes. assert.commandWorked( coll.insert({}, {writeConcern: {w: "majority", wtimeout: rst.kDefaultTimeoutMS}})); diff --git a/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js b/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js index f8679f67d7f..83fe5283117 100644 --- a/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js +++ b/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js @@ -45,7 +45,9 @@ conf.members[1].slaveDelay = 24 * 60 * 60; rst.startSet(); - rst.initiateWithAnyNodeAsPrimary(conf); + // We cannot wait for a stable checkpoint due to the slaveDelay. + rst.initiateWithAnyNodeAsPrimary( + conf, "replSetInitiate", {doNotWaitForStableCheckpoint: true}); var master = rst.getPrimary(); // Waits for PRIMARY state. // Reconfigure primary with a small cache size so less data needs to be diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index 8f46b66c97d..7c200283189 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -142,9 +142,9 @@ var ReplSetTest = function(opts) { return self.liveNodes.master || false; } - function asCluster(conn, fn) { - if (self.keyFile) { - return authutil.asCluster(conn, self.keyFile, fn); + function asCluster(conn, fn, keyFileParam = self.keyFile) { + if (keyFileParam) { + return authutil.asCluster(conn, keyFileParam, fn); } else { return fn(); } @@ -649,7 +649,8 @@ var ReplSetTest = function(opts) { var primary = expectedPrimaryNodeId; for (var i = 0; i < nodes.length; i++) { - var replSetGetStatus = nodes[i].getDB("admin").runCommand({replSetGetStatus: 1}); + var replSetGetStatus = + assert.commandWorked(nodes[i].getDB("admin").runCommand({replSetGetStatus: 1})); var nodesPrimary = -1; for (var j = 0; j < replSetGetStatus.members.length; j++) { if (replSetGetStatus.members[j].state === ReplSetTest.State.PRIMARY) { @@ -841,7 +842,8 @@ var ReplSetTest = function(opts) { * aren't authorized to run replSetGetStatus. * TODO(SERVER-14017): remove this in favor of using initiate() everywhere. */ - this.initiateWithAnyNodeAsPrimary = function(cfg, initCmd) { + this.initiateWithAnyNodeAsPrimary = function( + cfg, initCmd, {doNotWaitForStableCheckpoint: doNotWaitForStableCheckpoint = false} = {}) { var master = this.nodes[0].getDB("admin"); var config = cfg || this.getReplSetConfig(); var cmd = {}; @@ -926,7 +928,6 @@ var ReplSetTest = function(opts) { master = this.getPrimary(); jsTest.authenticateNodes(this.nodes); } - this.awaitSecondaryNodes(); let shouldWaitForKeys = true; @@ -990,6 +991,9 @@ var ReplSetTest = function(opts) { }); } + if (!doNotWaitForStableCheckpoint) { + self.awaitLastStableCheckpointTimestamp(); + } }; /** @@ -1122,6 +1126,125 @@ var ReplSetTest = function(opts) { return masterOpTime; }; + /** + * This function waits for all nodes in this replica set to take a stable checkpoint. In order + * to be able to roll back a node must have a stable timestamp. In order to be able to restart + * and not go into resync right after initial sync, a node must have a stable checkpoint. By + * waiting for all nodes to report having a stable checkpoint, we ensure that both of these + * conditions are met and that our tests can run as expected. Beyond simply waiting, this + * function does writes to ensure that a stable checkpoint will be taken. + */ + this.awaitLastStableCheckpointTimestamp = function() { + let rst = this; + let master = rst.getPrimary(); + let id = tojson(rst.nodeList()); + + // Algorithm precondition: All nodes must be in primary/secondary state. + // + // 1) Perform a majority write. This will guarantee the primary updates its commit point + // to the value of this write. + // + // 2) Perform a second write. This will guarantee that all nodes will update their commit + // point to a time that is >= the previous write. That will trigger a stable checkpoint + // on all nodes. + // TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to + // advance the commit point if the commit point being lagged is sufficient to choose a + // sync source. + function advanceCommitPoint(master) { + // Shadow 'db' so that we can call 'advanceCommitPoint' directly on the primary node. + let db = master.getDB('admin'); + const appendOplogNoteFn = function() { + assert.commandWorked(db.adminCommand({ + "appendOplogNote": 1, + "data": {"awaitLastStableCheckpointTimestamp": 1}, + "writeConcern": {"w": "majority", "wtimeout": ReplSetTest.kDefaultTimeoutMS} + })); + assert.commandWorked(db.adminCommand( + {"appendOplogNote": 1, "data": {"awaitLastStableCheckpointTimestamp": 2}})); + }; + + // TODO(SERVER-14017): Remove this extra sub-shell in favor of a cleaner authentication + // solution. + const masterId = "n" + rst.getNodeId(master); + const masterOptions = rst.nodeOptions[masterId] || {}; + if (masterOptions.clusterAuthMode === "x509") { + print("AwaitLastStableCheckpointTimestamp: authenticating on separate shell " + + "with x509 for " + id); + const subShellArgs = [ + 'mongo', + '--ssl', + '--sslCAFile=' + masterOptions.sslCAFile, + '--sslPEMKeyFile=' + masterOptions.sslPEMKeyFile, + '--sslAllowInvalidHostnames', + '--authenticationDatabase=$external', + '--authenticationMechanism=MONGODB-X509', + master.host, + '--eval', + `(${appendOplogNoteFn.toString()})();` + ]; + + const retVal = _runMongoProgram(...subShellArgs); + assert.eq(retVal, 0, 'mongo shell did not succeed with exit code 0'); + } else { + if (masterOptions.clusterAuthMode) { + print("AwaitLastStableCheckpointTimestamp: authenticating with " + + masterOptions.clusterAuthMode + " for " + id); + } + asCluster(master, appendOplogNoteFn, masterOptions.keyFile); + } + } + + print("AwaitLastStableCheckpointTimestamp: Beginning for " + id); + + let replSetStatus = assert.commandWorked(master.adminCommand("replSetGetStatus")); + if (replSetStatus["configsvr"]) { + // Performing dummy replicated writes against a configsvr is hard, especially if auth + // is also enabled. + return; + } + + rst.awaitNodesAgreeOnPrimary(); + master = rst.getPrimary(); + + print("AwaitLastStableCheckpointTimestamp: ensuring the commit point advances for " + id); + advanceCommitPoint(master); + + print("AwaitLastStableCheckpointTimestamp: Waiting for stable checkpoints for " + id); + + assert.soonNoExcept(function() { + for (let node of rst.nodes) { + // The `lastStableCheckpointTimestamp` field contains the timestamp of a previous + // checkpoint taken at a stable timestamp. At startup recovery, this field + // contains the timestamp reflected in the data. After startup recovery, it may + // be lagged and there may be a stable checkpoint at a newer timestamp. + let res = assert.commandWorked(node.adminCommand({replSetGetStatus: 1})); + + // Continue if we're connected to an arbiter. + if (res.myState === ReplSetTest.State.ARBITER) { + continue; + } + + // A missing `lastStableCheckpointTimestamp` field indicates that the storage + // engine does not support `recover to a stable timestamp`. + if (!res.hasOwnProperty("lastStableCheckpointTimestamp")) { + continue; + } + + // A null `lastStableCheckpointTimestamp` indicates that the storage engine supports + // "recover to a stable timestamp" but does not have a stable checkpoint yet. + if (res.lastStableCheckpointTimestamp.getTime() === 0) { + print("AwaitLastStableCheckpointTimestamp: " + node.host + + " does not have a stable checkpoint yet."); + return false; + } + } + + return true; + }, "Not all members have a stable checkpoint"); + + print("AwaitLastStableCheckpointTimestamp: Successfully took stable checkpoints on " + id); + }; + // Wait until the optime of the specified type reaches the primary's last applied optime. this.awaitReplication = function(timeout, secondaryOpTimeType) { timeout = timeout || self.kDefaultTimeoutMS; |