summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2018-04-20 09:54:04 -0400
committerJudah Schvimer <judah@mongodb.com>2018-04-20 09:54:04 -0400
commit5aec800d301a6806d82eac3a6bc5753b8c16dc5d (patch)
tree792f82f5c6ef0f039622817ffe48199e5ef41929
parent197390da1d7cfae131673debdbef53a29947f065 (diff)
downloadmongo-5aec800d301a6806d82eac3a6bc5753b8c16dc5d.tar.gz
SERVER-33165 Don't return from ReplSetTest.initiate until there is a stable checkpoint
-rw-r--r--buildscripts/resmokelib/testing/fixtures/replicaset.py58
-rw-r--r--jstests/noPassthrough/auth_reject_mismatching_logical_times.js4
-rw-r--r--jstests/noPassthrough/auto_retry_on_network_error.js4
-rw-r--r--jstests/noPassthrough/read_majority.js4
-rw-r--r--jstests/noPassthrough/read_majority_reads.js4
-rw-r--r--jstests/noPassthrough/timestamp_index_builds.js7
-rw-r--r--jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js4
-rw-r--r--src/mongo/shell/replsettest.js135
8 files changed, 203 insertions, 17 deletions
diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py
index 9e0fe34d4db..9d4878298f4 100644
--- a/buildscripts/resmokelib/testing/fixtures/replicaset.py
+++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py
@@ -196,6 +196,7 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
"""Wait for replica set tpo be ready."""
self._await_primary()
self._await_secondaries()
+ self._await_stable_checkpoint()
def _await_primary(self):
# Wait for the primary to be elected.
@@ -230,6 +231,63 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst
time.sleep(0.1) # Wait a little bit before trying again.
self.logger.info("Secondary on port %d is now available.", secondary.port)
+ def _await_stable_checkpoint(self):
+ # Since this method is called at startup we expect the first node to be primary even when
+ # self.all_nodes_electable is True.
+ primary = self.nodes[0]
+ primary_client = primary.mongo_client()
+ if self.auth_options is not None:
+ auth_db = primary_client[self.auth_options["authenticationDatabase"]]
+ auth_db.authenticate(self.auth_options["username"],
+ password=self.auth_options["password"],
+ mechanism=self.auth_options["authenticationMechanism"])
+ # Algorithm precondition: All nodes must be in primary/secondary state.
+ #
+ # 1) Perform a majority write. This will guarantee the primary updates its commit point
+ # to the value of this write.
+ #
+ # 2) Perform a second write. This will guarantee that all nodes will update their commit
+ # point to a time that is >= the previous write. That will trigger a stable checkpoint
+ # on all nodes.
+ # TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to
+ # advance the commit point if the commit point being lagged is sufficient to choose a
+ # sync source.
+ admin = primary_client.get_database(
+ "admin", write_concern=pymongo.write_concern.WriteConcern(w="majority"))
+ admin.command("appendOplogNote", data={"await_stable_checkpoint": 1})
+ admin.command("appendOplogNote", data={"await_stable_checkpoint": 2})
+
+ for node in self.nodes:
+ self.logger.info("Waiting for node on port %d to have a stable checkpoint.", node.port)
+ client = node.mongo_client(read_preference=pymongo.ReadPreference.SECONDARY)
+ client_admin = client["admin"]
+ if self.auth_options is not None:
+ client_auth_db = client[self.auth_options["authenticationDatabase"]]
+ client_auth_db.authenticate(self.auth_options["username"],
+ password=self.auth_options["password"],
+ mechanism=self.auth_options["authenticationMechanism"])
+
+ while True:
+ status = client_admin.command("replSetGetStatus")
+ # The `lastStableCheckpointTimestamp` field contains the timestamp of a previous
+ # checkpoint taken at a stable timestamp. At startup recovery, this field
+ # contains the timestamp reflected in the data. After startup recovery, it may
+ # be lagged and there may be a stable checkpoint at a newer timestamp.
+ last_stable = status.get("lastStableCheckpointTimestamp", None)
+
+ # A missing `lastStableCheckpointTimestamp` field indicates that the storage
+ # engine does not support "recover to a stable timestamp".
+ if not last_stable:
+ break
+
+ # A null `lastStableCheckpointTimestamp` indicates that the storage engine supports
+ # "recover to a stable timestamp" but does not have a stable checkpoint yet.
+ if last_stable.time:
+ self.logger.info("Node on port %d now has a stable checkpoint. Time: %s",
+ node.port, last_stable)
+ break
+ time.sleep(0.1) # Wait a little bit before trying again.
+
def _do_teardown(self):
self.logger.info("Stopping all members of the replica set...")
diff --git a/jstests/noPassthrough/auth_reject_mismatching_logical_times.js b/jstests/noPassthrough/auth_reject_mismatching_logical_times.js
index 804251c63a2..0d2a368ad49 100644
--- a/jstests/noPassthrough/auth_reject_mismatching_logical_times.js
+++ b/jstests/noPassthrough/auth_reject_mismatching_logical_times.js
@@ -45,7 +45,9 @@
// Add shard with auth enabled.
const rst = new ReplSetTest({nodes: 2});
rst.startSet({keyFile: "jstests/libs/key1", shardsvr: ""});
- rst.initiate();
+
+ // TODO: Wait for stable checkpoint when SERVER-32672 is fixed.
+ rst.initiateWithAnyNodeAsPrimary(null, "replSetInitiate", {doNotWaitForStableCheckpoint: true});
assert.commandWorked(st.s.adminCommand({addShard: rst.getURL()}));
const testDB = st.s.getDB("test");
diff --git a/jstests/noPassthrough/auto_retry_on_network_error.js b/jstests/noPassthrough/auto_retry_on_network_error.js
index 64c5ec6ae1f..b9bc5d6685b 100644
--- a/jstests/noPassthrough/auto_retry_on_network_error.js
+++ b/jstests/noPassthrough/auto_retry_on_network_error.js
@@ -43,7 +43,9 @@
const rst = new ReplSetTest({nodes: 1});
rst.startSet();
- rst.initiate();
+
+ // awaitLastStableCheckpointTimestamp runs an 'appendOplogNote' command which is not retryable.
+ rst.initiateWithAnyNodeAsPrimary(null, "replSetInitiate", {doNotWaitForStableCheckpoint: true});
const dbName = "test";
const collName = "auto_retry";
diff --git a/jstests/noPassthrough/read_majority.js b/jstests/noPassthrough/read_majority.js
index 2cdf629927a..d4fbb75c367 100644
--- a/jstests/noPassthrough/read_majority.js
+++ b/jstests/noPassthrough/read_majority.js
@@ -40,7 +40,9 @@ load("jstests/libs/analyze_plan.js");
}
});
replTest.startSet();
- replTest.initiate();
+ // Cannot wait for a stable checkpoint with 'testingSnapshotBehaviorInIsolation' set.
+ replTest.initiateWithAnyNodeAsPrimary(
+ null, "replSetInitiate", {doNotWaitForStableCheckpoint: true});
const session =
replTest.getPrimary().getDB("test").getMongo().startSession({causalConsistency: false});
diff --git a/jstests/noPassthrough/read_majority_reads.js b/jstests/noPassthrough/read_majority_reads.js
index c8322a31c67..1f196856dd7 100644
--- a/jstests/noPassthrough/read_majority_reads.js
+++ b/jstests/noPassthrough/read_majority_reads.js
@@ -234,7 +234,9 @@
}
});
replTest.startSet();
- replTest.initiate();
+ // Cannot wait for a stable checkpoint with 'testingSnapshotBehaviorInIsolation' set.
+ replTest.initiateWithAnyNodeAsPrimary(
+ null, "replSetInitiate", {doNotWaitForStableCheckpoint: true});
var mongod = replTest.getPrimary();
diff --git a/jstests/noPassthrough/timestamp_index_builds.js b/jstests/noPassthrough/timestamp_index_builds.js
index 3ebda4f3691..b55b1805e00 100644
--- a/jstests/noPassthrough/timestamp_index_builds.js
+++ b/jstests/noPassthrough/timestamp_index_builds.js
@@ -41,12 +41,7 @@
let coll = getColl(rst.getPrimary());
- // TODO Can be removed with SERVER-33165.
- //
- // Create a collection and perform two majority writes. This guarantees both nodes will have a
- // stable timestamp.
- assert.commandWorked(
- coll.insert({}, {writeConcern: {w: "majority", wtimeout: rst.kDefaultTimeoutMS}}));
+ // Create a collection and wait for the stable timestamp to exceed its creation on both nodes.
assert.commandWorked(
coll.insert({}, {writeConcern: {w: "majority", wtimeout: rst.kDefaultTimeoutMS}}));
diff --git a/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js b/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js
index f8679f67d7f..83fe5283117 100644
--- a/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js
+++ b/jstests/noPassthrough/wt_delayed_secondary_read_concern_majority.js
@@ -45,7 +45,9 @@
conf.members[1].slaveDelay = 24 * 60 * 60;
rst.startSet();
- rst.initiateWithAnyNodeAsPrimary(conf);
+ // We cannot wait for a stable checkpoint due to the slaveDelay.
+ rst.initiateWithAnyNodeAsPrimary(
+ conf, "replSetInitiate", {doNotWaitForStableCheckpoint: true});
var master = rst.getPrimary(); // Waits for PRIMARY state.
// Reconfigure primary with a small cache size so less data needs to be
diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js
index 8f46b66c97d..7c200283189 100644
--- a/src/mongo/shell/replsettest.js
+++ b/src/mongo/shell/replsettest.js
@@ -142,9 +142,9 @@ var ReplSetTest = function(opts) {
return self.liveNodes.master || false;
}
- function asCluster(conn, fn) {
- if (self.keyFile) {
- return authutil.asCluster(conn, self.keyFile, fn);
+ function asCluster(conn, fn, keyFileParam = self.keyFile) {
+ if (keyFileParam) {
+ return authutil.asCluster(conn, keyFileParam, fn);
} else {
return fn();
}
@@ -649,7 +649,8 @@ var ReplSetTest = function(opts) {
var primary = expectedPrimaryNodeId;
for (var i = 0; i < nodes.length; i++) {
- var replSetGetStatus = nodes[i].getDB("admin").runCommand({replSetGetStatus: 1});
+ var replSetGetStatus =
+ assert.commandWorked(nodes[i].getDB("admin").runCommand({replSetGetStatus: 1}));
var nodesPrimary = -1;
for (var j = 0; j < replSetGetStatus.members.length; j++) {
if (replSetGetStatus.members[j].state === ReplSetTest.State.PRIMARY) {
@@ -841,7 +842,8 @@ var ReplSetTest = function(opts) {
* aren't authorized to run replSetGetStatus.
* TODO(SERVER-14017): remove this in favor of using initiate() everywhere.
*/
- this.initiateWithAnyNodeAsPrimary = function(cfg, initCmd) {
+ this.initiateWithAnyNodeAsPrimary = function(
+ cfg, initCmd, {doNotWaitForStableCheckpoint: doNotWaitForStableCheckpoint = false} = {}) {
var master = this.nodes[0].getDB("admin");
var config = cfg || this.getReplSetConfig();
var cmd = {};
@@ -926,7 +928,6 @@ var ReplSetTest = function(opts) {
master = this.getPrimary();
jsTest.authenticateNodes(this.nodes);
}
-
this.awaitSecondaryNodes();
let shouldWaitForKeys = true;
@@ -990,6 +991,9 @@ var ReplSetTest = function(opts) {
});
}
+ if (!doNotWaitForStableCheckpoint) {
+ self.awaitLastStableCheckpointTimestamp();
+ }
};
/**
@@ -1122,6 +1126,125 @@ var ReplSetTest = function(opts) {
return masterOpTime;
};
+ /**
+ * This function waits for all nodes in this replica set to take a stable checkpoint. In order
+ * to be able to roll back a node must have a stable timestamp. In order to be able to restart
+ * and not go into resync right after initial sync, a node must have a stable checkpoint. By
+ * waiting for all nodes to report having a stable checkpoint, we ensure that both of these
+ * conditions are met and that our tests can run as expected. Beyond simply waiting, this
+ * function does writes to ensure that a stable checkpoint will be taken.
+ */
+ this.awaitLastStableCheckpointTimestamp = function() {
+ let rst = this;
+ let master = rst.getPrimary();
+ let id = tojson(rst.nodeList());
+
+ // Algorithm precondition: All nodes must be in primary/secondary state.
+ //
+ // 1) Perform a majority write. This will guarantee the primary updates its commit point
+ // to the value of this write.
+ //
+ // 2) Perform a second write. This will guarantee that all nodes will update their commit
+ // point to a time that is >= the previous write. That will trigger a stable checkpoint
+ // on all nodes.
+ // TODO(SERVER-33248): Remove this block. We should not need to prod the replica set to
+ // advance the commit point if the commit point being lagged is sufficient to choose a
+ // sync source.
+ function advanceCommitPoint(master) {
+ // Shadow 'db' so that we can call 'advanceCommitPoint' directly on the primary node.
+ let db = master.getDB('admin');
+ const appendOplogNoteFn = function() {
+ assert.commandWorked(db.adminCommand({
+ "appendOplogNote": 1,
+ "data": {"awaitLastStableCheckpointTimestamp": 1},
+ "writeConcern": {"w": "majority", "wtimeout": ReplSetTest.kDefaultTimeoutMS}
+ }));
+ assert.commandWorked(db.adminCommand(
+ {"appendOplogNote": 1, "data": {"awaitLastStableCheckpointTimestamp": 2}}));
+ };
+
+ // TODO(SERVER-14017): Remove this extra sub-shell in favor of a cleaner authentication
+ // solution.
+ const masterId = "n" + rst.getNodeId(master);
+ const masterOptions = rst.nodeOptions[masterId] || {};
+ if (masterOptions.clusterAuthMode === "x509") {
+ print("AwaitLastStableCheckpointTimestamp: authenticating on separate shell " +
+ "with x509 for " + id);
+ const subShellArgs = [
+ 'mongo',
+ '--ssl',
+ '--sslCAFile=' + masterOptions.sslCAFile,
+ '--sslPEMKeyFile=' + masterOptions.sslPEMKeyFile,
+ '--sslAllowInvalidHostnames',
+ '--authenticationDatabase=$external',
+ '--authenticationMechanism=MONGODB-X509',
+ master.host,
+ '--eval',
+ `(${appendOplogNoteFn.toString()})();`
+ ];
+
+ const retVal = _runMongoProgram(...subShellArgs);
+ assert.eq(retVal, 0, 'mongo shell did not succeed with exit code 0');
+ } else {
+ if (masterOptions.clusterAuthMode) {
+ print("AwaitLastStableCheckpointTimestamp: authenticating with " +
+ masterOptions.clusterAuthMode + " for " + id);
+ }
+ asCluster(master, appendOplogNoteFn, masterOptions.keyFile);
+ }
+ }
+
+ print("AwaitLastStableCheckpointTimestamp: Beginning for " + id);
+
+ let replSetStatus = assert.commandWorked(master.adminCommand("replSetGetStatus"));
+ if (replSetStatus["configsvr"]) {
+ // Performing dummy replicated writes against a configsvr is hard, especially if auth
+ // is also enabled.
+ return;
+ }
+
+ rst.awaitNodesAgreeOnPrimary();
+ master = rst.getPrimary();
+
+ print("AwaitLastStableCheckpointTimestamp: ensuring the commit point advances for " + id);
+ advanceCommitPoint(master);
+
+ print("AwaitLastStableCheckpointTimestamp: Waiting for stable checkpoints for " + id);
+
+ assert.soonNoExcept(function() {
+ for (let node of rst.nodes) {
+ // The `lastStableCheckpointTimestamp` field contains the timestamp of a previous
+ // checkpoint taken at a stable timestamp. At startup recovery, this field
+ // contains the timestamp reflected in the data. After startup recovery, it may
+ // be lagged and there may be a stable checkpoint at a newer timestamp.
+ let res = assert.commandWorked(node.adminCommand({replSetGetStatus: 1}));
+
+ // Continue if we're connected to an arbiter.
+ if (res.myState === ReplSetTest.State.ARBITER) {
+ continue;
+ }
+
+ // A missing `lastStableCheckpointTimestamp` field indicates that the storage
+ // engine does not support `recover to a stable timestamp`.
+ if (!res.hasOwnProperty("lastStableCheckpointTimestamp")) {
+ continue;
+ }
+
+ // A null `lastStableCheckpointTimestamp` indicates that the storage engine supports
+ // "recover to a stable timestamp" but does not have a stable checkpoint yet.
+ if (res.lastStableCheckpointTimestamp.getTime() === 0) {
+ print("AwaitLastStableCheckpointTimestamp: " + node.host +
+ " does not have a stable checkpoint yet.");
+ return false;
+ }
+ }
+
+ return true;
+ }, "Not all members have a stable checkpoint");
+
+ print("AwaitLastStableCheckpointTimestamp: Successfully took stable checkpoints on " + id);
+ };
+
// Wait until the optime of the specified type reaches the primary's last applied optime.
this.awaitReplication = function(timeout, secondaryOpTimeType) {
timeout = timeout || self.kDefaultTimeoutMS;