diff options
author | Judah Schvimer <judah@mongodb.com> | 2018-03-27 18:13:40 -0400 |
---|---|---|
committer | Judah Schvimer <judah@mongodb.com> | 2018-04-23 16:14:19 -0400 |
commit | 41d77e2940fbcbd691d4b2f01e9ec1dd5ef67e47 (patch) | |
tree | eb9b9ed8f47d46c41617c17cf352178beafea6d0 | |
parent | 916b24d822b0a776ca7c5aaa11cfaafe3b963e93 (diff) | |
download | mongo-41d77e2940fbcbd691d4b2f01e9ec1dd5ef67e47.tar.gz |
SERVER-34070 Add flag to perform replication recovery as a standalone
-rw-r--r-- | jstests/noPassthrough/standalone_replication_recovery.js | 131 | ||||
-rw-r--r-- | jstests/replsets/clean_shutdown_oplog_state.js | 18 | ||||
-rw-r--r-- | jstests/replsets/temp_namespace_restart_as_standalone.js | 14 | ||||
-rw-r--r-- | src/mongo/db/mongod_options.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/oplog.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_settings.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/repl/repl_settings.h | 4 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_recovery.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp | 12 |
11 files changed, 219 insertions, 32 deletions
diff --git a/jstests/noPassthrough/standalone_replication_recovery.js b/jstests/noPassthrough/standalone_replication_recovery.js new file mode 100644 index 00000000000..18b19dc1b6f --- /dev/null +++ b/jstests/noPassthrough/standalone_replication_recovery.js @@ -0,0 +1,131 @@ +/* + * Tests that a standalone succeeds when passed the 'recoverFromOplogAsStandalone' parameter. + * + * This test only makes sense for storage engines that support recover to stable timestamp. + * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication] + */ + +(function() { + "use strict"; + load("jstests/replsets/rslib.js"); + load("jstests/libs/write_concern_util.js"); + + const name = 'standalone_replication_recovery'; + const dbName = name; + const collName = 'srr_coll'; + const logLevel = tojson({storage: {recovery: 2}}); + + // We must explicitly set the flag to null each time because ReplSetTest remembers options. + const on = ""; + const off = null; + + const rst = new ReplSetTest({ + nodes: 2, + }); + + function getColl(conn) { + return conn.getDB(dbName)[collName]; + } + + function assertDocsInColl(node, nums) { + let results = getColl(node).find().sort({_id: 1}).toArray(); + let expected = nums.map((i) => ({_id: i})); + if (!friendlyEqual(results, expected)) { + rst.dumpOplog(node, {}, 100); + } + assert.eq(results, expected, "actual (left) != expected (right)"); + } + + jsTestLog("Test that an empty standalone fails trying to recover."); + assert.throws(() => rst.start(0, {noReplSet: true, recoverFromOplogAsStandalone: on})); + + jsTestLog("Initiating as a replica set."); + // Restart as a replica set node without the flag so we can add operations to the oplog. + let nodes = rst.startSet({setParameter: {logComponentVerbosity: logLevel}}); + let node = nodes[0]; + let secondary = nodes[1]; + rst.initiate({ + _id: name, + members: [{_id: 0, host: node.host}, {_id: 2, host: secondary.host, priority: 0}] + }); + + // Create the collection with w:majority and then perform a clean restart to ensure that + // the collection is in a stable checkpoint. + assert.commandWorked(node.getDB(dbName).runCommand({ + create: collName, + writeConcern: {w: "majority", wtimeout: ReplSetTest.kDefaultTimeoutMS} + })); + assertDocsInColl(node, []); + node = rst.restart(node, {"noReplSet": false}); + reconnect(node); + assert.eq(rst.getPrimary(), node); + + // Keep node 0 the primary, but prevent it from committing any writes. + stopServerReplication(secondary); + + assert.commandWorked(getColl(node).insert({_id: 3}, {writeConcern: {w: 1, j: 1}})); + assert.commandWorked(getColl(node).insert({_id: 4}, {writeConcern: {w: 1, j: 1}})); + assert.commandWorked(getColl(node).insert({_id: 5}, {writeConcern: {w: 1, j: 1}})); + assertDocsInColl(node, [3, 4, 5]); + + jsTestLog("Test that if we kill the node, recovery still plays."); + rst.stop(node, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}); + node = rst.restart(node, {"noReplSet": false}); + reconnect(node); + assert.eq(rst.getPrimary(), node); + assertDocsInColl(node, [3, 4, 5]); + + jsTestLog("Test that a replica set node cannot start up with the parameter set."); + assert.throws(() => rst.restart(0, {recoverFromOplogAsStandalone: on})); + + jsTestLog("Test that on restart as a standalone we only see committed writes by default."); + node = rst.start(node, {noReplSet: true}, true); + reconnect(node); + assertDocsInColl(node, []); + + jsTestLog("Test that on restart with the flag set we play recovery."); + node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on}); + reconnect(node); + assertDocsInColl(node, [3, 4, 5]); + + jsTestLog("Test that we go into read-only mode."); + assert.commandFailedWithCode(getColl(node).insert({_id: 1}), ErrorCodes.IllegalOperation); + + jsTestLog("Test that on restart after standalone recovery we do not see replicated writes."); + node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: off}); + reconnect(node); + assertDocsInColl(node, []); + assert.commandWorked(getColl(node).insert({_id: 6})); + assertDocsInColl(node, [6]); + node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on}); + reconnect(node); + assertDocsInColl(node, [3, 4, 5, 6]); + + jsTestLog("Test that we can restart again as a replica set node."); + node = rst.restart(node, {noReplSet: false, recoverFromOplogAsStandalone: off}); + reconnect(node); + assert.eq(rst.getPrimary(), node); + assertDocsInColl(node, [3, 4, 5, 6]); + + jsTestLog("Test that we can still recover as a standalone."); + assert.commandWorked(getColl(node).insert({_id: 7})); + assertDocsInColl(node, [3, 4, 5, 6, 7]); + node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: off}); + reconnect(node); + assertDocsInColl(node, [6]); + node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on}); + reconnect(node); + assertDocsInColl(node, [3, 4, 5, 6, 7]); + + jsTestLog("Restart as a replica set node so that the test can complete successfully."); + node = rst.restart(node, {noReplSet: false, recoverFromOplogAsStandalone: off}); + reconnect(node); + assert.eq(rst.getPrimary(), node); + assertDocsInColl(node, [3, 4, 5, 6, 7]); + + restartServerReplication(secondary); + + // Skip checking db hashes since we do a write as a standalone. + TestData.skipCheckDBHashes = true; + rst.stopSet(); +})();
\ No newline at end of file diff --git a/jstests/replsets/clean_shutdown_oplog_state.js b/jstests/replsets/clean_shutdown_oplog_state.js index 76d30c123b0..6aec7f6dce3 100644 --- a/jstests/replsets/clean_shutdown_oplog_state.js +++ b/jstests/replsets/clean_shutdown_oplog_state.js @@ -3,13 +3,7 @@ // present without this test failing. In particular if the rst.stop(1) doesn't execute mid-batch, // it isn't fully exercising the code. However, if the test fails there is definitely a bug. // -// SERVER-33525: Adding `requires_mmapv1`. This test shuts down MongoD while replicating and -// brings it back up as a standalone. Then it asserts the documents in the collection exactly -// match the entries in the oplog. With RTT, this assertion will only hold if the commit point is -// also advancing at the same pace. However, there may be other, softer assertions this test can -// make instead going forward. -// -// @tags: [requires_persistence, requires_mmapv1] +// @tags: [requires_persistence] (function() { "use strict"; @@ -63,6 +57,12 @@ var options = slave.savedOptions; options.noCleanData = true; delete options.replSet; + + var storageEngine = jsTest.options().storageEngine || "wiredTiger"; + if (storageEngine === "wiredTiger") { + options.recoverFromOplogAsStandalone = ""; + } + var conn = MongoRunner.runMongod(options); assert.neq(null, conn, "secondary failed to start"); @@ -88,7 +88,9 @@ try { assert.eq(collDoc._id, oplogDoc.o._id); assert(!('begin' in minValidDoc), 'begin in minValidDoc'); - assert.eq(minValidDoc.ts, oplogDoc.ts); + if (storageEngine !== "wiredTiger") { + assert.eq(minValidDoc.ts, oplogDoc.ts); + } assert.eq(oplogTruncateAfterPointDoc.oplogTruncateAfterPoint, Timestamp()); } catch (e) { // TODO remove once SERVER-25777 is resolved. diff --git a/jstests/replsets/temp_namespace_restart_as_standalone.js b/jstests/replsets/temp_namespace_restart_as_standalone.js index 46144cc8b64..8c91465882c 100644 --- a/jstests/replsets/temp_namespace_restart_as_standalone.js +++ b/jstests/replsets/temp_namespace_restart_as_standalone.js @@ -2,11 +2,7 @@ * Tests that temporary collections are not dropped when a member of a replica set is started up as * a stand-alone mongod, i.e. without the --replSet parameter. * - * This test restarts a node as a standalone. With RTT, standalones start up at a time in the past - * since they do not perform replication recovery, so we must only run it with - * mmapv1. SERVER-34070 will make this feasible to test again on RTT storage engines. - * - * @tags: [requires_persistence, requires_mmapv1] + * @tags: [requires_persistence] */ (function() { var rst = new ReplSetTest({nodes: 2}); @@ -59,7 +55,13 @@ var secondaryNodeId = rst.getNodeId(secondaryDB.getMongo()); rst.stop(secondaryNodeId); - secondaryConn = MongoRunner.runMongod({dbpath: secondaryConn.dbpath, noCleanData: true}); + var storageEngine = jsTest.options().storageEngine || "wiredTiger"; + if (storageEngine === "wiredTiger") { + secondaryConn = MongoRunner.runMongod( + {dbpath: secondaryConn.dbpath, noCleanData: true, recoverFromOplogAsStandalone: ""}); + } else { + secondaryConn = MongoRunner.runMongod({dbpath: secondaryConn.dbpath, noCleanData: true}); + } assert.neq(null, secondaryConn, "secondary failed to start up as a stand-alone mongod"); secondaryDB = secondaryConn.getDB("test"); diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp index 141fa00ae38..61a1df60860 100644 --- a/src/mongo/db/mongod_options.cpp +++ b/src/mongo/db/mongod_options.cpp @@ -364,6 +364,15 @@ Status addMongodOptions(moe::OptionSection* options) { "size to use (in MB) for replication op log. default is 5% of disk space " "(i.e. large is good)"); + replication_options + .addOptionChaining("replication.recoverFromOplogAsStandalone", + "recoverFromOplogAsStandalone", + moe::Switch, + "specifies that a standalone should execute replication recovery") + .hidden() + .incompatibleWith("replication.replSet") + .incompatibleWith("replication.replSetName"); + rs_options .addOptionChaining("replication.replSet", "replSet", @@ -1062,6 +1071,11 @@ Status storeMongodOptions(const moe::Environment& params) { replSettings.setOplogSizeBytes(x * 1024 * 1024); invariant(replSettings.getOplogSizeBytes() > 0); } + if (params.count("replication.recoverFromOplogAsStandalone")) { + replSettings.setShouldRecoverFromOplogAsStandalone( + params["replication.recoverFromOplogAsStandalone"].as<bool>()); + } + if (params.count("cacheSize")) { long x = params["cacheSize"].as<long>(); if (x <= 0) { diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp index e793ab4a6f4..6e083990f4e 100644 --- a/src/mongo/db/repl/oplog.cpp +++ b/src/mongo/db/repl/oplog.cpp @@ -1144,7 +1144,7 @@ Status applyOperation_inlock(OperationContext* opCtx, // the individual operations will not contain a `ts` field. The caller is responsible for // setting the timestamp before committing. Assigning a competing timestamp in this // codepath would break that atomicity. Sharding is a consumer of this use-case. - const bool assignOperationTimestamp = [opCtx, haveWrappingWriteUnitOfWork] { + const bool assignOperationTimestamp = [opCtx, haveWrappingWriteUnitOfWork, mode] { const auto replMode = ReplicationCoordinator::get(opCtx)->getReplicationMode(); if (opCtx->writesAreReplicated()) { // We do not assign timestamps on replicated writes since they will get their oplog @@ -1163,8 +1163,9 @@ Status applyOperation_inlock(OperationContext* opCtx, break; } case ReplicationCoordinator::modeNone: { - // We do not assign timestamps on standalones. - return false; + // Only assign timestamps on standalones during replication recovery when + // started with 'recoverFromOplogAsStandalone'. + return mode == OplogApplication::Mode::kRecovering; } } } @@ -1553,7 +1554,7 @@ Status applyCommand_inlock(OperationContext* opCtx, } } - const bool assignCommandTimestamp = [opCtx] { + const bool assignCommandTimestamp = [opCtx, mode] { const auto replMode = ReplicationCoordinator::get(opCtx)->getReplicationMode(); if (opCtx->writesAreReplicated()) { // We do not assign timestamps on replicated writes since they will get their oplog @@ -1570,8 +1571,9 @@ Status applyCommand_inlock(OperationContext* opCtx, return true; } case ReplicationCoordinator::modeNone: { - // We do not assign timestamps on standalones. - return false; + // Only assign timestamps on standalones during replication recovery when + // started with 'recoverFromOplogAsStandalone'. + return mode == OplogApplication::Mode::kRecovering; } } MONGO_UNREACHABLE; diff --git a/src/mongo/db/repl/repl_settings.cpp b/src/mongo/db/repl/repl_settings.cpp index 3fb69df5d3d..bf64b2bf057 100644 --- a/src/mongo/db/repl/repl_settings.cpp +++ b/src/mongo/db/repl/repl_settings.cpp @@ -60,6 +60,10 @@ std::string ReplSettings::getReplSetString() const { return _replSetString; } +bool ReplSettings::getShouldRecoverFromOplogAsStandalone() const { + return _shouldRecoverFromOplogAsStandalone; +} + ReplSettings::IndexPrefetchConfig ReplSettings::getPrefetchIndexMode() const { return _prefetchIndexMode; } @@ -80,6 +84,10 @@ void ReplSettings::setReplSetString(std::string replSetString) { _replSetString = replSetString; } +void ReplSettings::setShouldRecoverFromOplogAsStandalone(bool shouldRecover) { + _shouldRecoverFromOplogAsStandalone = shouldRecover; +} + void ReplSettings::setPrefetchIndexMode(std::string prefetchIndexModeString) { if (prefetchIndexModeString.empty()) { _prefetchIndexMode = IndexPrefetchConfig::UNINITIALIZED; diff --git a/src/mongo/db/repl/repl_settings.h b/src/mongo/db/repl/repl_settings.h index 1a74414dade..9f75c10d712 100644 --- a/src/mongo/db/repl/repl_settings.h +++ b/src/mongo/db/repl/repl_settings.h @@ -56,6 +56,7 @@ public: */ long long getOplogSizeBytes() const; std::string getReplSetString() const; + bool getShouldRecoverFromOplogAsStandalone() const; /** * Note: _prefetchIndexMode is initialized to UNINITIALIZED by default. @@ -75,12 +76,15 @@ public: void setOplogSizeBytes(long long oplogSizeBytes); void setReplSetString(std::string replSetString); void setPrefetchIndexMode(std::string prefetchIndexModeString); + void setShouldRecoverFromOplogAsStandalone(bool shouldRecover); private: long long _oplogSizeBytes = 0; // --oplogSize std::string _replSetString; // --replSet[/<seedlist>] + bool _shouldRecoverFromOplogAsStandalone = false; // --shouldRecoverFromOplogAsStandalone + // --indexPrefetch IndexPrefetchConfig _prefetchIndexMode = IndexPrefetchConfig::UNINITIALIZED; }; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index b52d9e05b37..6922a0f51ef 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -125,7 +125,6 @@ MONGO_INITIALIZER(periodicNoopIntervalSecs)(InitializerContext*) { return Status::OK(); } - /** * Allows non-local writes despite _canAcceptNonlocalWrites being false on a single OperationContext * while in scope. @@ -738,11 +737,34 @@ void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx, void ReplicationCoordinatorImpl::startup(OperationContext* opCtx) { if (!isReplEnabled()) { + if (_settings.getShouldRecoverFromOplogAsStandalone()) { + if (!_storage->supportsRecoverToStableTimestamp(opCtx->getServiceContext())) { + severe() << "Cannot use 'recoverFromOplogAsStandalone' with a storage engine that " + "does not support recover to stable timestamp."; + fassertFailedNoTrace(50805); + } + auto recoveryTS = _storage->getRecoveryTimestamp(opCtx->getServiceContext()); + if (!recoveryTS || recoveryTS->isNull()) { + severe() + << "Cannot use 'recoverFromOplogAsStandalone' without a stable checkpoint."; + fassertFailedNoTrace(50806); + } + + // We pass in "none" for the stable timestamp so that recoverFromOplog asks storage + // for the recoveryTimestamp just like on replica set recovery. + const auto stableTimestamp = boost::none; + _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp); + warning() << "Setting mongod to readOnly mode as a result of specifying " + "'recoverFromOplogAsStandalone'."; + storageGlobalParams.readOnly = true; + } + stdx::lock_guard<stdx::mutex> lk(_mutex); _setConfigState_inlock(kConfigReplicationDisabled); return; } invariant(_settings.usingReplSets()); + invariant(!_settings.getShouldRecoverFromOplogAsStandalone()); { stdx::lock_guard<stdx::mutex> lk(_mutex); diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index e8f010f2b11..24bc1c8d60b 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -84,7 +84,7 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx, topOfOplogSW.getStatus() == ErrorCodes::NamespaceNotFound) { // Oplog is empty. There are no oplog entries to apply, so we exit recovery and go into // initial sync. - log() << "No oplog entries to apply for recovery. Oplog is empty. Entering initial sync."; + log() << "No oplog entries to apply for recovery. Oplog is empty."; return; } fassert(40290, topOfOplogSW); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp index 938b49876c4..c98617bdc78 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp @@ -222,8 +222,9 @@ StatusWith<std::string> WiredTigerIndex::generateCreateString(const std::string& << "formatVersion=" << keyStringVersion << ',' << "infoObj=" << desc.infoObj().jsonString() << "),"; - if (WiredTigerUtil::useTableLogging(NamespaceString(desc.parentNS()), - getGlobalReplSettings().usingReplSets())) { + bool replicatedWrites = getGlobalReplSettings().usingReplSets() || + getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone(); + if (WiredTigerUtil::useTableLogging(NamespaceString(desc.parentNS()), replicatedWrites)) { ss << "log=(enabled=true)"; } else { ss << "log=(enabled=false)"; @@ -272,11 +273,12 @@ WiredTigerIndex::WiredTigerIndex(OperationContext* ctx, version.getValue() == kKeyStringV1Version ? KeyString::Version::V1 : KeyString::Version::V0; if (!isReadOnly) { + bool replicatedWrites = getGlobalReplSettings().usingReplSets() || + getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone(); uassertStatusOK(WiredTigerUtil::setTableLogging( ctx, uri, - WiredTigerUtil::useTableLogging(NamespaceString(desc->parentNS()), - getGlobalReplSettings().usingReplSets()))); + WiredTigerUtil::useTableLogging(NamespaceString(desc->parentNS()), replicatedWrites))); } } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index 717adb8c4bd..bbefc5c9b11 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -595,8 +595,9 @@ StatusWith<std::string> WiredTigerRecordStore::generateCreateString( } ss << ")"; - if (WiredTigerUtil::useTableLogging(NamespaceString(ns), - getGlobalReplSettings().usingReplSets())) { + bool replicatedWrites = getGlobalReplSettings().usingReplSets() || + getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone(); + if (WiredTigerUtil::useTableLogging(NamespaceString(ns), replicatedWrites)) { ss << ",log=(enabled=true)"; } else { ss << ",log=(enabled=false)"; @@ -647,11 +648,10 @@ WiredTigerRecordStore::WiredTigerRecordStore(WiredTigerKVEngine* kvEngine, } if (!params.isReadOnly) { + bool replicatedWrites = getGlobalReplSettings().usingReplSets() || + getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone(); uassertStatusOK(WiredTigerUtil::setTableLogging( - ctx, - _uri, - WiredTigerUtil::useTableLogging(NamespaceString(ns()), - getGlobalReplSettings().usingReplSets()))); + ctx, _uri, WiredTigerUtil::useTableLogging(NamespaceString(ns()), replicatedWrites))); } if (_isOplog) { |