diff options
author | Judah Schvimer <judah.schvimer@10gen.com> | 2019-12-02 16:36:46 +0000 |
---|---|---|
committer | evergreen <evergreen@mongodb.com> | 2019-12-02 16:36:46 +0000 |
commit | d90cdf5eb5f01b93ba7fecc11001dbeb6b040bb8 (patch) | |
tree | ee57f01a8229c1b05a8121d038d0ef5c28b06c26 | |
parent | 98ff3036a2157f0fed25bca81e55684acb5b9bd7 (diff) | |
download | mongo-d90cdf5eb5f01b93ba7fecc11001dbeb6b040bb8.tar.gz |
SERVER-44612 recoverFromOplogAsStandalone with takeUnstableCheckpointOnShutdown should succeed if retried after a successful attempt
-rw-r--r-- | jstests/replsets/standalone_replication_recovery_idempotent.js | 164 | ||||
-rw-r--r-- | src/mongo/db/repl/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 25 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 87 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.h | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_mock.h | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery_test.cpp | 142 | ||||
-rw-r--r-- | src/mongo/db/storage/storage_parameters.idl | 6 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_parameters.idl | 6 |
10 files changed, 419 insertions, 30 deletions
diff --git a/jstests/replsets/standalone_replication_recovery_idempotent.js b/jstests/replsets/standalone_replication_recovery_idempotent.js new file mode 100644 index 00000000000..9108ba2e2e9 --- /dev/null +++ b/jstests/replsets/standalone_replication_recovery_idempotent.js @@ -0,0 +1,164 @@ +/* + * Tests that a 'recoverFromOplogAsStandalone' with 'takeUnstableCheckpointOnShutdown' is + * idempotent. + * + * This test only makes sense for storage engines that support recover to stable timestamp. + * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication, + * requires_majority_read_concern, uses_transactions, uses_prepare_transaction] + */ + +(function() { +"use strict"; +load("jstests/replsets/rslib.js"); +load("jstests/libs/write_concern_util.js"); +load("jstests/core/txns/libs/prepare_helpers.js"); + +const name = jsTestName(); +const dbName = name; +const collName1 = 'srri_coll1'; +const collName2 = 'srri_coll2'; + +const logLevel = tojson({storage: {recovery: 2}}); + +const rst = new ReplSetTest({ + nodes: 2, +}); + +function getColl1(conn) { + return conn.getDB(dbName)[collName1]; +} + +function getColl2(conn) { + return conn.getDB(dbName)[collName2]; +} + +function assertDocsInColl1(node, nums) { + let results = getColl1(node).find().sort({_id: 1}).toArray(); + let expected = nums.map((i) => ({_id: i})); + if (!friendlyEqual(results, expected)) { + rst.dumpOplog(node, {}, 100); + } + assert.eq(results, expected, "actual (left) != expected (right)"); +} + +function assertPrepareConflictColl2(node, id) { + assert.sameMembers(getColl2(node).find().toArray(), [{_id: id}]); + assert.commandFailedWithCode( + node.getDB(dbName).runCommand( + {update: collName2, updates: [{q: {_id: id}, u: {$inc: {x: 1}}}], maxTimeMS: 1000}), + ErrorCodes.MaxTimeMSExpired); +} + +jsTestLog("Initiating as a replica set."); +let nodes = rst.startSet({setParameter: {logComponentVerbosity: logLevel}}); +let node = nodes[0]; +let secondary = nodes[1]; +rst.initiate( + {_id: name, members: [{_id: 0, host: node.host}, {_id: 1, host: secondary.host, priority: 0}]}); + +// Create two collections with w:majority and then perform a clean restart to ensure that +// the collections are in a stable checkpoint. +assert.commandWorked(getColl1(node).insert({_id: 3}, {writeConcern: {w: "majority"}})); +assert.commandWorked(getColl2(node).insert({_id: 1}, {writeConcern: {w: "majority"}})); + +node = rst.restart(node, {"noReplSet": false}); +reconnect(node); +assert.eq(rst.getPrimary(), node); +assertDocsInColl1(node, [3]); +assert.sameMembers(getColl2(node).find().toArray(), [{_id: 1}]); + +// Keep node 0 the primary, but prevent it from committing any writes. +stopServerReplication(secondary); +assert.commandWorked(getColl1(node).insert({_id: 4}, {writeConcern: {w: 1, j: 1}})); +assertDocsInColl1(node, [3, 4]); + +let session = node.startSession(); +const sessionDB = session.getDatabase(dbName); +const sessionColl2 = sessionDB.getCollection(collName2); +session.startTransaction(); +const txnNumber = session.getTxnNumber_forTesting(); +assert.commandWorked(sessionColl2.update({_id: 1}, {_id: 1, a: 1})); +let prepareTimestamp = PrepareHelpers.prepareTransaction(session, {w: 1, j: 1}); +jsTestLog("Prepared a transaction at " + prepareTimestamp); +assertPrepareConflictColl2(node, 1); + +jsTestLog("Test that on restart with just 'recoverFromOplogAsStandalone' set we play recovery."); +node = rst.restart(node, { + noReplSet: true, + setParameter: {recoverFromOplogAsStandalone: true, logComponentVerbosity: logLevel} +}); +reconnect(node); +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); +assert.commandFailedWithCode(getColl1(node).insert({_id: 7}), ErrorCodes.IllegalOperation); + +jsTestLog("Test that on restart with just 'recoverFromOplogAsStandalone' we succeed" + + " idempotently."); +node = rst.restart(node, { + noReplSet: true, + setParameter: {recoverFromOplogAsStandalone: true, logComponentVerbosity: logLevel} +}); +reconnect(node); +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); +assert.commandFailedWithCode(getColl1(node).insert({_id: 7}), ErrorCodes.IllegalOperation); + +jsTestLog("Test that on restart with both flags we succeed."); +node = rst.restart(node, { + noReplSet: true, + setParameter: { + recoverFromOplogAsStandalone: true, + takeUnstableCheckpointOnShutdown: true, + logComponentVerbosity: logLevel + } +}); +reconnect(node); +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); +assert.commandFailedWithCode(getColl1(node).insert({_id: 7}), ErrorCodes.IllegalOperation); + +jsTestLog("Test that on restart with both flags we succeed idempotently."); +node = rst.restart(node, { + noReplSet: true, + setParameter: { + recoverFromOplogAsStandalone: true, + takeUnstableCheckpointOnShutdown: true, + logComponentVerbosity: logLevel + } +}); +reconnect(node); +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); +assert.commandFailedWithCode(getColl1(node).insert({_id: 7}), ErrorCodes.IllegalOperation); + +jsTestLog("Restart as a replica set node so that we can commit the transaction"); +node = rst.restart(node, { + noReplSet: false, + setParameter: { + recoverFromOplogAsStandalone: false, + takeUnstableCheckpointOnShutdown: false, + logComponentVerbosity: logLevel + } +}); +reconnect(node); +assert.eq(rst.getPrimary(), node); +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); + +restartServerReplication(secondary); +rst.awaitReplication(); + +assertDocsInColl1(node, [3, 4]); +assertPrepareConflictColl2(node, 1); + +assert.commandWorked(node.adminCommand({ + commitTransaction: 1, + commitTimestamp: prepareTimestamp, + lsid: session.getSessionId(), + txnNumber: NumberLong(txnNumber), + autocommit: false +})); +assert.sameMembers(getColl2(node).find().toArray(), [{_id: 1, a: 1}]); + +rst.stopSet(); +})();
\ No newline at end of file diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 3cd1dbbd937..6b8b5d3cf22 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -340,6 +340,7 @@ env.Library( 'oplog', 'oplog_application', '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/storage/storage_options', ], ) diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 378e1e3ba47..d652865c23a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -787,30 +787,7 @@ void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx, void ReplicationCoordinatorImpl::startup(OperationContext* opCtx) { if (!isReplEnabled()) { if (ReplSettings::shouldRecoverFromOplogAsStandalone()) { - if (!_storage->supportsRecoveryTimestamp(opCtx->getServiceContext())) { - severe() << "Cannot use 'recoverFromOplogAsStandalone' with a storage engine that " - "does not support recover to stable timestamp."; - fassertFailedNoTrace(50805); - } - auto recoveryTS = _storage->getRecoveryTimestamp(opCtx->getServiceContext()); - if (!recoveryTS || recoveryTS->isNull()) { - severe() - << "Cannot use 'recoverFromOplogAsStandalone' without a stable checkpoint."; - fassertFailedNoTrace(50806); - } - - // Initialize the cached pointer to the oplog collection. - acquireOplogCollectionForLogging(opCtx); - - // We pass in "none" for the stable timestamp so that recoverFromOplog asks storage - // for the recoveryTimestamp just like on replica set recovery. - const auto stableTimestamp = boost::none; - _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp); - reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); - - warning() << "Setting mongod to readOnly mode as a result of specifying " - "'recoverFromOplogAsStandalone'."; - storageGlobalParams.readOnly = true; + _replicationProcess->getReplicationRecovery()->recoverFromOplogAsStandalone(opCtx); } stdx::lock_guard<Latch> lk(_mutex); diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index e23313602cf..a36e73238f4 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -47,6 +47,7 @@ #include "mongo/db/repl/transaction_oplog_application.h" #include "mongo/db/server_recovery.h" #include "mongo/db/session.h" +#include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/transaction_history_iterator.h" #include "mongo/db/transaction_participant.h" #include "mongo/util/log.h" @@ -207,6 +208,92 @@ ReplicationRecoveryImpl::ReplicationRecoveryImpl(StorageInterface* storageInterf ReplicationConsistencyMarkers* consistencyMarkers) : _storageInterface(storageInterface), _consistencyMarkers(consistencyMarkers) {} +void ReplicationRecoveryImpl::_assertNoRecoveryNeededOnUnstableCheckpoint(OperationContext* opCtx) { + invariant(_storageInterface->supportsRecoveryTimestamp(opCtx->getServiceContext())); + invariant(!_storageInterface->getRecoveryTimestamp(opCtx->getServiceContext())); + + if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { + severe() << "Unexpected recovery needed, initial sync flag set."; + fassertFailedNoTrace(31362); + } + + const auto truncateAfterPoint = _consistencyMarkers->getOplogTruncateAfterPoint(opCtx); + if (!truncateAfterPoint.isNull()) { + severe() << "Unexpected recovery needed, oplog requires truncation. Truncate after point: " + << truncateAfterPoint.toString(); + fassertFailedNoTrace(31363); + } + + auto topOfOplogSW = _getTopOfOplog(opCtx); + if (!topOfOplogSW.isOK()) { + severe() << "Recovery not possible, no oplog found: " << topOfOplogSW.getStatus(); + fassertFailedNoTrace(31364); + } + const auto topOfOplog = topOfOplogSW.getValue(); + + const auto appliedThrough = _consistencyMarkers->getAppliedThrough(opCtx); + if (!appliedThrough.isNull() && appliedThrough != topOfOplog) { + severe() << "Unexpected recovery needed, appliedThrough is not at top of oplog, indicating " + "oplog has not been fully applied. appliedThrough: " + << appliedThrough.toString(); + fassertFailedNoTrace(31365); + } + + const auto minValid = _consistencyMarkers->getMinValid(opCtx); + if (minValid > topOfOplog) { + severe() << "Unexpected recovery needed, top of oplog is not consistent. topOfOplog: " + << topOfOplog << ", minValid: " << minValid; + fassertFailedNoTrace(31366); + } +} + +void ReplicationRecoveryImpl::recoverFromOplogAsStandalone(OperationContext* opCtx) { + if (!_storageInterface->supportsRecoveryTimestamp(opCtx->getServiceContext())) { + severe() << "Cannot use 'recoverFromOplogAsStandalone' with a storage engine that " + "does not support recover to stable timestamp."; + fassertFailedNoTrace(50805); + } + + // A non-existent recoveryTS means the checkpoint is unstable. If the recoveryTS exists but + // is null, that means a stable checkpoint was taken at a null timestamp. This should never + // happen. + auto recoveryTS = _storageInterface->getRecoveryTimestamp(opCtx->getServiceContext()); + if (recoveryTS && recoveryTS->isNull()) { + severe() << "Cannot use 'recoverFromOplogAsStandalone' with stable checkpoint at null " + << "timestamp."; + fassertFailedNoTrace(50806); + } + + // Initialize the cached pointer to the oplog collection. + acquireOplogCollectionForLogging(opCtx); + + if (recoveryTS) { + // We pass in "none" for the stable timestamp so that recoverFromOplog asks storage + // for the recoveryTimestamp just like on replica set recovery. + const auto stableTimestamp = boost::none; + recoverFromOplog(opCtx, stableTimestamp); + } else { + if (gTakeUnstableCheckpointOnShutdown) { + // Ensure 'recoverFromOplogAsStandalone' with 'takeUnstableCheckpointOnShutdown' + // is safely idempotent when it succeeds. + log() << "Recovering from unstable checkpoint with 'takeUnstableCheckpointOnShutdown'." + << " Confirming that no oplog recovery is needed."; + _assertNoRecoveryNeededOnUnstableCheckpoint(opCtx); + log() << "Not doing any oplog recovery since there is an unstable checkpoint that is " + << "up to date."; + } else { + severe() << "Cannot use 'recoverFromOplogAsStandalone' without a stable checkpoint."; + fassertFailedNoTrace(31229); + } + } + + reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); + + warning() << "Setting mongod to readOnly mode as a result of specifying " + "'recoverFromOplogAsStandalone'."; + storageGlobalParams.readOnly = true; +} + void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) try { if (_consistencyMarkers->getInitialSyncFlag(opCtx)) { diff --git a/src/mongo/db/repl/replication_recovery.h b/src/mongo/db/repl/replication_recovery.h index 0261fb5a74d..572f50ad5e1 100644 --- a/src/mongo/db/repl/replication_recovery.h +++ b/src/mongo/db/repl/replication_recovery.h @@ -55,6 +55,13 @@ public: */ virtual void recoverFromOplog(OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) = 0; + + /** + * Recovers the data on disk from the oplog and puts the node in readOnly mode. If + * 'takeUnstableCheckpointOnShutdown' is specified and an unstable checkpoint is present, + * ensures that recovery can be skipped safely. + */ + virtual void recoverFromOplogAsStandalone(OperationContext* opCtx) = 0; }; class ReplicationRecoveryImpl : public ReplicationRecovery { @@ -68,8 +75,16 @@ public: void recoverFromOplog(OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override; + void recoverFromOplogAsStandalone(OperationContext* opCtx) override; + private: /** + * Confirms that the data and oplog all indicate that the nodes has an unstable checkpoint + * that is fully up to date. + */ + void _assertNoRecoveryNeededOnUnstableCheckpoint(OperationContext* opCtx); + + /** * After truncating the oplog, completes recovery if we're recovering from a stable timestamp * or a stable checkpoint. */ diff --git a/src/mongo/db/repl/replication_recovery_mock.h b/src/mongo/db/repl/replication_recovery_mock.h index 77835215ca3..300eb52afb5 100644 --- a/src/mongo/db/repl/replication_recovery_mock.h +++ b/src/mongo/db/repl/replication_recovery_mock.h @@ -44,6 +44,8 @@ public: void recoverFromOplog(OperationContext* opCtx, boost::optional<Timestamp> stableTimestamp) override {} + + void recoverFromOplogAsStandalone(OperationContext* opCtx) override {} }; } // namespace repl diff --git a/src/mongo/db/repl/replication_recovery_test.cpp b/src/mongo/db/repl/replication_recovery_test.cpp index bf440816b12..c891c933171 100644 --- a/src/mongo/db/repl/replication_recovery_test.cpp +++ b/src/mongo/db/repl/replication_recovery_test.cpp @@ -46,6 +46,7 @@ #include "mongo/db/service_context_d_test_fixture.h" #include "mongo/db/session_catalog_mongod.h" #include "mongo/db/session_txn_record_gen.h" +#include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/stdx/memory.h" #include "mongo/unittest/death_test.h" #include "mongo/unittest/unittest.h" @@ -200,6 +201,8 @@ private: _consistencyMarkers.reset(); ServiceContextMongoDTest::tearDown(); + storageGlobalParams.readOnly = false; + gTakeUnstableCheckpointOnShutdown = false; } void _createOpCtx() { @@ -1185,4 +1188,143 @@ TEST_F(ReplicationRecoveryTest, ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(3, 0), 1)); } +DEATH_TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneFailsWithoutStableCheckpoint, + "Fatal Assertion 31229") { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {5}); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +DEATH_TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneFailsWithNullStableCheckpoint, + "Fatal Assertion 50806") { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(0, 0)); + _setUpOplog(opCtx, getStorageInterface(), {5}); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +TEST_F(ReplicationRecoveryTest, RecoverFromOplogAsStandaloneRecoversOplog) { + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {2, 5}); + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(2, 2), 1)); + + recovery.recoverFromOplogAsStandalone(opCtx); + _assertDocsInTestCollection(opCtx, {5}); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); + + // Test the node is readOnly. + ASSERT_THROWS(getStorageInterface()->insertDocument(opCtx, testNs, {_makeInsertDocument(2)}, 1), + AssertionException); +} + +TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownRecoversOplog) { + gTakeUnstableCheckpointOnShutdown = true; + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {2, 5}); + getStorageInterfaceRecovery()->setRecoveryTimestamp(Timestamp(2, 2)); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(2, 2), 1)); + + recovery.recoverFromOplogAsStandalone(opCtx); + _assertDocsInTestCollection(opCtx, {5}); + ASSERT_EQ(getConsistencyMarkers()->getAppliedThrough(opCtx), OpTime(Timestamp(5, 5), 1)); +} + +TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownIsIdempotent) { + gTakeUnstableCheckpointOnShutdown = true; + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {2}); + + recovery.recoverFromOplogAsStandalone(opCtx); + _assertDocsInTestCollection(opCtx, {}); +} + +DEATH_TEST_F( + ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownFailsWithInitialSyncFlag, + "Fatal Assertion 31362") { + gTakeUnstableCheckpointOnShutdown = true; + + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + getConsistencyMarkers()->setInitialSyncFlag(opCtx); + _setUpOplog(opCtx, getStorageInterface(), {5}); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +DEATH_TEST_F( + ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownFailsWithOplogTruncateAfterPoint, + "Fatal Assertion 31363") { + gTakeUnstableCheckpointOnShutdown = true; + + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + getConsistencyMarkers()->setOplogTruncateAfterPoint(opCtx, Timestamp(4, 4)); + _setUpOplog(opCtx, getStorageInterface(), {5}); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +DEATH_TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownFailsWithEmptyOplog, + "Fatal Assertion 31364") { + gTakeUnstableCheckpointOnShutdown = true; + + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {}); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +DEATH_TEST_F( + ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownFailsWithMismatchedAppliedThrough, + "Fatal Assertion 31365") { + gTakeUnstableCheckpointOnShutdown = true; + + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {5}); + getConsistencyMarkers()->setAppliedThrough(opCtx, OpTime(Timestamp(2, 2), 1)); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + +DEATH_TEST_F(ReplicationRecoveryTest, + RecoverFromOplogAsStandaloneWithTakeUnstableCheckpointOnShutdownFailsWithHighMinValid, + "Fatal Assertion 31366") { + gTakeUnstableCheckpointOnShutdown = true; + + ReplicationRecoveryImpl recovery(getStorageInterface(), getConsistencyMarkers()); + auto opCtx = getOperationContext(); + + _setUpOplog(opCtx, getStorageInterface(), {5}); + getConsistencyMarkers()->setMinValid(opCtx, OpTime(Timestamp(20, 20), 1)); + + recovery.recoverFromOplogAsStandalone(opCtx); +} + } // namespace diff --git a/src/mongo/db/storage/storage_parameters.idl b/src/mongo/db/storage/storage_parameters.idl index 1bc6fad4981..dd1298bc1e2 100644 --- a/src/mongo/db/storage/storage_parameters.idl +++ b/src/mongo/db/storage/storage_parameters.idl @@ -56,3 +56,9 @@ server_parameters: validator: gte: 1 lte: { expr: 'StorageGlobalParams::kMaxJournalCommitIntervalMs' } + takeUnstableCheckpointOnShutdown: + description: 'Take unstable checkpoint on shutdown' + cpp_vartype: bool + cpp_varname: gTakeUnstableCheckpointOnShutdown + set_at: startup + default: false diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index a284f4a2eb5..f14adfbfe22 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -69,6 +69,7 @@ #include "mongo/db/storage/journal_listener.h" #include "mongo/db/storage/storage_file_util.h" #include "mongo/db/storage/storage_options.h" +#include "mongo/db/storage/storage_parameters_gen.h" #include "mongo/db/storage/storage_repair_observer.h" #include "mongo/db/storage/wiredtiger/wiredtiger_cursor.h" #include "mongo/db/storage/wiredtiger/wiredtiger_customization_hooks.h" diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.idl b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.idl index ca638b62195..c7706b08b7a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.idl +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_parameters.idl @@ -71,12 +71,6 @@ server_parameters: expr: 'kDebugBuild ? 5 : 300' validator: gte: 0 - takeUnstableCheckpointOnShutdown: - description: 'Take unstable checkpoint on shutdown' - cpp_vartype: bool - cpp_varname: gTakeUnstableCheckpointOnShutdown - set_at: startup - default: false # The "wiredTigerCursorCacheSize" parameter has the following meaning. # |