SERVER-49167 Set the stable timestamp without using the stable optime candidates when enableMajorityReadConcern:false

author: William Schultz <william.schultz@mongodb.com> 2020-07-09 11:03:09 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-07-09 17:26:40 +0000
commit: a6cd89a6c4d39b4b21376b109ad335e2fda8fb5d (patch)
tree: a5ed05e410a91fec5f2b1801c0790a358132e424
parent: 2863db2f5418e7a204f0d35993f083f6b59639bc (diff)
download: mongo-a6cd89a6c4d39b4b21376b109ad335e2fda8fb5d.tar.gz
2 files changed, 131 insertions, 11 deletions
diff --git a/jstests/replsets/rollback_crash_before_reaching_minvalid.js b/jstests/replsets/rollback_crash_before_reaching_minvalid.js
new file mode 100644
index 00000000000..87bcaa5fc7b
--- /dev/null
+++ b/jstests/replsets/rollback_crash_before_reaching_minvalid.js
@@ -0,0 +1,116 @@
+/**
+ * Test that a node does not take a stable checkpoint at a timestamp earlier than minValid after
+ * crashing post rollbackViaRefetch. This test exercises that behavior when run with
+ * enableMajorityReadConcern:false.
+ *
+ * @tags: [requires_persistence]
+ */
+(function() {
+"use strict";
+
+load("jstests/replsets/libs/rollback_test.js");
+load("jstests/libs/fail_point_util.js");
+
+TestData.rollbackShutdowns = true;
+let dbName = "test";
+let sourceCollName = "coll";
+
+let doc1 = {_id: 1, x: "document_of_interest"};
+
+let CommonOps = (node) => {
+    // Insert a document that will exist on all nodes.
+    assert.commandWorked(node.getDB(dbName)[sourceCollName].insert(doc1));
+};
+
+let SyncSourceOps = (node) => {
+    // Insert some documents on the sync source so the rollback node will have a minValid it needs
+    // to catch up to.
+    assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 1, sync_source: 1}));
+    assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 2, sync_source: 1}));
+    assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 3, sync_source: 1}));
+};
+
+let RollbackOps = (node) => {
+    // Delete the document on the rollback node so it will be refetched from sync source.
+    assert.commandWorked(node.getDB(dbName)[sourceCollName].remove(doc1));
+};
+
+const replTest = new ReplSetTest({nodes: 3, useBridge: true});
+replTest.startSet();
+// Speed up the test.
+replTest.nodes.forEach(node => {
+    assert.commandWorked(
+        node.adminCommand({configureFailPoint: 'setSmallOplogGetMoreMaxTimeMS', mode: 'alwaysOn'}));
+});
+let config = replTest.getReplSetConfig();
+config.members[2].priority = 0;
+config.settings = {
+    chainingAllowed: false
+};
+replTest.initiateWithHighElectionTimeout(config);
+let rollbackTest = new RollbackTest("rollback_crash_before_reaching_minvalid", replTest);
+CommonOps(rollbackTest.getPrimary());
+
+let rollbackNode = rollbackTest.transitionToRollbackOperations();
+
+// Have the node hang after rollback has completed but before it starts applying ops again.
+rollbackNode.adminCommand({configureFailPoint: 'bgSyncHangAfterRunRollback', mode: 'alwaysOn'});
+RollbackOps(rollbackNode);
+
+let node = rollbackTest.transitionToSyncSourceOperationsBeforeRollback();
+SyncSourceOps(node);
+
+// Let the rollback run.
+rollbackTest.transitionToSyncSourceOperationsDuringRollback();
+
+jsTestLog("Waiting for the rollback node to hit the failpoint.");
+checkLog.contains(rollbackNode, "bgSyncHangAfterRunRollback failpoint is set");
+
+// Kill the rollback node before it has reached minValid. Sending a shutdown signal to the node
+// should cause us to break out of the hung failpoint, so we don't need to explicitly turn the
+// failpoint off.
+jsTestLog("Killing the rollback node.");
+replTest.stop(0, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true});
+replTest.start(
+    0,
+    {
+        setParameter: {
+            // Pause oplog fetching so the node doesn't advance past minValid after restart.
+            "failpoint.stopReplProducer": "{'mode':'alwaysOn'}"
+        }
+    },
+    true /* restart */);
+
+// Wait long enough for the initial stable checkpoint to be triggered if it was going to be. We
+// expect that no stable checkpoints are taken. If they are, we expect the test to fail when we
+// restart below and recover from a stable checkpoint.
+//
+// First we wait until the node has a commit point, since learning of one should trigger an update
+// to the stable timestamp. Then, we wait for a bit after this for any potential checkpoint to
+// occur. In the worst case, if the checkpoint was very slow to complete, we might produce a false
+// negative test result (the test would pass even though a bug existed), but we consider this
+// acceptable if it happens rarely.
+assert.soonNoExcept(() => {
+    let status = replTest.nodes[0].adminCommand({replSetGetStatus: 1});
+    return status.optimes.lastCommittedOpTime.ts !== Timestamp(0, 0);
+});
+sleep(5000);
+
+// Kill and restart the node to test that we don't recover from an inconsistent stable checkpoint
+// taken above.
+replTest.stop(0, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true});
+replTest.start(
+    0,
+    {
+        setParameter: {
+            // Make sure this failpoint is not still enabled in the saved startup options.
+            "failpoint.stopReplProducer": "{'mode':'off'}"
+        }
+    },
+    true /* restart */);
+
+rollbackTest.transitionToSteadyStateOperations();
+
+// Check the replica set.
+rollbackTest.stop();
+}());
+\ No newline at end of file
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index ea39a6edcba..85f26da6e00 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -686,6 +686,19 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
         OpTime minValid = _replicationProcess->getConsistencyMarkers()->getMinValid(opCtx.get());
         consistency =
             (lastOpTime >= minValid) ? DataConsistency::Consistent : DataConsistency::Inconsistent;
+
+        // It is not safe to take stable checkpoints until we reach minValid, so we set our
+        // initialDataTimestamp to prevent this. It is expected that this is only necessary when
+        // enableMajorityReadConcern:false.
+        if (lastOpTime < minValid) {
+            LOGV2_DEBUG(4916700,
+                        2,
+                        "Setting initialDataTimestamp to minValid since our last optime is less "
+                        "than minValid",
+                        "lastOpTime"_attr = lastOpTime,
+                        "minValid"_attr = minValid);
+            _storage->setInitialDataTimestamp(getServiceContext(), minValid.getTimestamp());
+        }
     }
 
     // Update the global timestamp before setting the last applied opTime forward so the last
@@ -4982,23 +4995,14 @@ boost::optional<OpTimeAndWallTime> ReplicationCoordinatorImpl::_recalculateStabl
     // Make sure the stable optime does not surpass its maximum.
     stableOpTime = OpTimeAndWallTime(std::min(noOverlap, maximumStableOpTime.opTime), Date_t());
 
-    // Keep EMRC=false behavior the same for now.
-    // TODO (SERVER-47844) Don't use stable optime candidates here.
-    if (!serverGlobalParams.enableMajorityReadConcern) {
-        stableOpTime =
-            _chooseStableOpTimeFromCandidates(lk, _stableOpTimeCandidates, maximumStableOpTime);
-    }
-
     if (stableOpTime) {
         // Check that the selected stable optime does not exceed our maximum and that it does not
         // surpass the no-overlap point.
         invariant(stableOpTime.get().opTime.getTimestamp() <=
                   maximumStableOpTime.opTime.getTimestamp());
         invariant(stableOpTime.get().opTime <= maximumStableOpTime.opTime);
-        if (serverGlobalParams.enableMajorityReadConcern) {
-            invariant(stableOpTime.get().opTime.getTimestamp() <= noOverlap.getTimestamp());
-            invariant(stableOpTime.get().opTime <= noOverlap);
-        }
+        invariant(stableOpTime.get().opTime.getTimestamp() <= noOverlap.getTimestamp());
+        invariant(stableOpTime.get().opTime <= noOverlap);
     }
 
     return stableOpTime;
author	William Schultz <william.schultz@mongodb.com>	2020-07-09 11:03:09 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-07-09 17:26:40 +0000
commit	a6cd89a6c4d39b4b21376b109ad335e2fda8fb5d (patch)
tree	a5ed05e410a91fec5f2b1801c0790a358132e424
parent	2863db2f5418e7a204f0d35993f083f6b59639bc (diff)
download	mongo-a6cd89a6c4d39b4b21376b109ad335e2fda8fb5d.tar.gz