From a6cd89a6c4d39b4b21376b109ad335e2fda8fb5d Mon Sep 17 00:00:00 2001 From: William Schultz Date: Thu, 9 Jul 2020 11:03:09 -0400 Subject: SERVER-49167 Set the stable timestamp without using the stable optime candidates when enableMajorityReadConcern:false --- .../rollback_crash_before_reaching_minvalid.js | 116 +++++++++++++++++++++ src/mongo/db/repl/replication_coordinator_impl.cpp | 26 +++-- 2 files changed, 131 insertions(+), 11 deletions(-) create mode 100644 jstests/replsets/rollback_crash_before_reaching_minvalid.js diff --git a/jstests/replsets/rollback_crash_before_reaching_minvalid.js b/jstests/replsets/rollback_crash_before_reaching_minvalid.js new file mode 100644 index 00000000000..87bcaa5fc7b --- /dev/null +++ b/jstests/replsets/rollback_crash_before_reaching_minvalid.js @@ -0,0 +1,116 @@ +/** + * Test that a node does not take a stable checkpoint at a timestamp earlier than minValid after + * crashing post rollbackViaRefetch. This test exercises that behavior when run with + * enableMajorityReadConcern:false. + * + * @tags: [requires_persistence] + */ +(function() { +"use strict"; + +load("jstests/replsets/libs/rollback_test.js"); +load("jstests/libs/fail_point_util.js"); + +TestData.rollbackShutdowns = true; +let dbName = "test"; +let sourceCollName = "coll"; + +let doc1 = {_id: 1, x: "document_of_interest"}; + +let CommonOps = (node) => { + // Insert a document that will exist on all nodes. + assert.commandWorked(node.getDB(dbName)[sourceCollName].insert(doc1)); +}; + +let SyncSourceOps = (node) => { + // Insert some documents on the sync source so the rollback node will have a minValid it needs + // to catch up to. + assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 1, sync_source: 1})); + assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 2, sync_source: 1})); + assert.commandWorked(node.getDB(dbName)[sourceCollName].insert({x: 3, sync_source: 1})); +}; + +let RollbackOps = (node) => { + // Delete the document on the rollback node so it will be refetched from sync source. + assert.commandWorked(node.getDB(dbName)[sourceCollName].remove(doc1)); +}; + +const replTest = new ReplSetTest({nodes: 3, useBridge: true}); +replTest.startSet(); +// Speed up the test. +replTest.nodes.forEach(node => { + assert.commandWorked( + node.adminCommand({configureFailPoint: 'setSmallOplogGetMoreMaxTimeMS', mode: 'alwaysOn'})); +}); +let config = replTest.getReplSetConfig(); +config.members[2].priority = 0; +config.settings = { + chainingAllowed: false +}; +replTest.initiateWithHighElectionTimeout(config); +let rollbackTest = new RollbackTest("rollback_crash_before_reaching_minvalid", replTest); +CommonOps(rollbackTest.getPrimary()); + +let rollbackNode = rollbackTest.transitionToRollbackOperations(); + +// Have the node hang after rollback has completed but before it starts applying ops again. +rollbackNode.adminCommand({configureFailPoint: 'bgSyncHangAfterRunRollback', mode: 'alwaysOn'}); +RollbackOps(rollbackNode); + +let node = rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); +SyncSourceOps(node); + +// Let the rollback run. +rollbackTest.transitionToSyncSourceOperationsDuringRollback(); + +jsTestLog("Waiting for the rollback node to hit the failpoint."); +checkLog.contains(rollbackNode, "bgSyncHangAfterRunRollback failpoint is set"); + +// Kill the rollback node before it has reached minValid. Sending a shutdown signal to the node +// should cause us to break out of the hung failpoint, so we don't need to explicitly turn the +// failpoint off. +jsTestLog("Killing the rollback node."); +replTest.stop(0, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true}); +replTest.start( + 0, + { + setParameter: { + // Pause oplog fetching so the node doesn't advance past minValid after restart. + "failpoint.stopReplProducer": "{'mode':'alwaysOn'}" + } + }, + true /* restart */); + +// Wait long enough for the initial stable checkpoint to be triggered if it was going to be. We +// expect that no stable checkpoints are taken. If they are, we expect the test to fail when we +// restart below and recover from a stable checkpoint. +// +// First we wait until the node has a commit point, since learning of one should trigger an update +// to the stable timestamp. Then, we wait for a bit after this for any potential checkpoint to +// occur. In the worst case, if the checkpoint was very slow to complete, we might produce a false +// negative test result (the test would pass even though a bug existed), but we consider this +// acceptable if it happens rarely. +assert.soonNoExcept(() => { + let status = replTest.nodes[0].adminCommand({replSetGetStatus: 1}); + return status.optimes.lastCommittedOpTime.ts !== Timestamp(0, 0); +}); +sleep(5000); + +// Kill and restart the node to test that we don't recover from an inconsistent stable checkpoint +// taken above. +replTest.stop(0, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true}); +replTest.start( + 0, + { + setParameter: { + // Make sure this failpoint is not still enabled in the saved startup options. + "failpoint.stopReplProducer": "{'mode':'off'}" + } + }, + true /* restart */); + +rollbackTest.transitionToSteadyStateOperations(); + +// Check the replica set. +rollbackTest.stop(); +}()); \ No newline at end of file diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ea39a6edcba..85f26da6e00 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -686,6 +686,19 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig( OpTime minValid = _replicationProcess->getConsistencyMarkers()->getMinValid(opCtx.get()); consistency = (lastOpTime >= minValid) ? DataConsistency::Consistent : DataConsistency::Inconsistent; + + // It is not safe to take stable checkpoints until we reach minValid, so we set our + // initialDataTimestamp to prevent this. It is expected that this is only necessary when + // enableMajorityReadConcern:false. + if (lastOpTime < minValid) { + LOGV2_DEBUG(4916700, + 2, + "Setting initialDataTimestamp to minValid since our last optime is less " + "than minValid", + "lastOpTime"_attr = lastOpTime, + "minValid"_attr = minValid); + _storage->setInitialDataTimestamp(getServiceContext(), minValid.getTimestamp()); + } } // Update the global timestamp before setting the last applied opTime forward so the last @@ -4982,23 +4995,14 @@ boost::optional ReplicationCoordinatorImpl::_recalculateStabl // Make sure the stable optime does not surpass its maximum. stableOpTime = OpTimeAndWallTime(std::min(noOverlap, maximumStableOpTime.opTime), Date_t()); - // Keep EMRC=false behavior the same for now. - // TODO (SERVER-47844) Don't use stable optime candidates here. - if (!serverGlobalParams.enableMajorityReadConcern) { - stableOpTime = - _chooseStableOpTimeFromCandidates(lk, _stableOpTimeCandidates, maximumStableOpTime); - } - if (stableOpTime) { // Check that the selected stable optime does not exceed our maximum and that it does not // surpass the no-overlap point. invariant(stableOpTime.get().opTime.getTimestamp() <= maximumStableOpTime.opTime.getTimestamp()); invariant(stableOpTime.get().opTime <= maximumStableOpTime.opTime); - if (serverGlobalParams.enableMajorityReadConcern) { - invariant(stableOpTime.get().opTime.getTimestamp() <= noOverlap.getTimestamp()); - invariant(stableOpTime.get().opTime <= noOverlap); - } + invariant(stableOpTime.get().opTime.getTimestamp() <= noOverlap.getTimestamp()); + invariant(stableOpTime.get().opTime <= noOverlap); } return stableOpTime; -- cgit v1.2.1