summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2018-03-27 18:13:40 -0400
committerJudah Schvimer <judah@mongodb.com>2018-04-23 16:14:19 -0400
commit41d77e2940fbcbd691d4b2f01e9ec1dd5ef67e47 (patch)
treeeb9b9ed8f47d46c41617c17cf352178beafea6d0
parent916b24d822b0a776ca7c5aaa11cfaafe3b963e93 (diff)
downloadmongo-41d77e2940fbcbd691d4b2f01e9ec1dd5ef67e47.tar.gz
SERVER-34070 Add flag to perform replication recovery as a standalone
-rw-r--r--jstests/noPassthrough/standalone_replication_recovery.js131
-rw-r--r--jstests/replsets/clean_shutdown_oplog_state.js18
-rw-r--r--jstests/replsets/temp_namespace_restart_as_standalone.js14
-rw-r--r--src/mongo/db/mongod_options.cpp14
-rw-r--r--src/mongo/db/repl/oplog.cpp14
-rw-r--r--src/mongo/db/repl/repl_settings.cpp8
-rw-r--r--src/mongo/db/repl/repl_settings.h4
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp24
-rw-r--r--src/mongo/db/repl/replication_recovery.cpp2
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp10
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp12
11 files changed, 219 insertions, 32 deletions
diff --git a/jstests/noPassthrough/standalone_replication_recovery.js b/jstests/noPassthrough/standalone_replication_recovery.js
new file mode 100644
index 00000000000..18b19dc1b6f
--- /dev/null
+++ b/jstests/noPassthrough/standalone_replication_recovery.js
@@ -0,0 +1,131 @@
+/*
+ * Tests that a standalone succeeds when passed the 'recoverFromOplogAsStandalone' parameter.
+ *
+ * This test only makes sense for storage engines that support recover to stable timestamp.
+ * @tags: [requires_wiredtiger, requires_persistence, requires_journaling, requires_replication]
+ */
+
+(function() {
+ "use strict";
+ load("jstests/replsets/rslib.js");
+ load("jstests/libs/write_concern_util.js");
+
+ const name = 'standalone_replication_recovery';
+ const dbName = name;
+ const collName = 'srr_coll';
+ const logLevel = tojson({storage: {recovery: 2}});
+
+ // We must explicitly set the flag to null each time because ReplSetTest remembers options.
+ const on = "";
+ const off = null;
+
+ const rst = new ReplSetTest({
+ nodes: 2,
+ });
+
+ function getColl(conn) {
+ return conn.getDB(dbName)[collName];
+ }
+
+ function assertDocsInColl(node, nums) {
+ let results = getColl(node).find().sort({_id: 1}).toArray();
+ let expected = nums.map((i) => ({_id: i}));
+ if (!friendlyEqual(results, expected)) {
+ rst.dumpOplog(node, {}, 100);
+ }
+ assert.eq(results, expected, "actual (left) != expected (right)");
+ }
+
+ jsTestLog("Test that an empty standalone fails trying to recover.");
+ assert.throws(() => rst.start(0, {noReplSet: true, recoverFromOplogAsStandalone: on}));
+
+ jsTestLog("Initiating as a replica set.");
+ // Restart as a replica set node without the flag so we can add operations to the oplog.
+ let nodes = rst.startSet({setParameter: {logComponentVerbosity: logLevel}});
+ let node = nodes[0];
+ let secondary = nodes[1];
+ rst.initiate({
+ _id: name,
+ members: [{_id: 0, host: node.host}, {_id: 2, host: secondary.host, priority: 0}]
+ });
+
+ // Create the collection with w:majority and then perform a clean restart to ensure that
+ // the collection is in a stable checkpoint.
+ assert.commandWorked(node.getDB(dbName).runCommand({
+ create: collName,
+ writeConcern: {w: "majority", wtimeout: ReplSetTest.kDefaultTimeoutMS}
+ }));
+ assertDocsInColl(node, []);
+ node = rst.restart(node, {"noReplSet": false});
+ reconnect(node);
+ assert.eq(rst.getPrimary(), node);
+
+ // Keep node 0 the primary, but prevent it from committing any writes.
+ stopServerReplication(secondary);
+
+ assert.commandWorked(getColl(node).insert({_id: 3}, {writeConcern: {w: 1, j: 1}}));
+ assert.commandWorked(getColl(node).insert({_id: 4}, {writeConcern: {w: 1, j: 1}}));
+ assert.commandWorked(getColl(node).insert({_id: 5}, {writeConcern: {w: 1, j: 1}}));
+ assertDocsInColl(node, [3, 4, 5]);
+
+ jsTestLog("Test that if we kill the node, recovery still plays.");
+ rst.stop(node, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL});
+ node = rst.restart(node, {"noReplSet": false});
+ reconnect(node);
+ assert.eq(rst.getPrimary(), node);
+ assertDocsInColl(node, [3, 4, 5]);
+
+ jsTestLog("Test that a replica set node cannot start up with the parameter set.");
+ assert.throws(() => rst.restart(0, {recoverFromOplogAsStandalone: on}));
+
+ jsTestLog("Test that on restart as a standalone we only see committed writes by default.");
+ node = rst.start(node, {noReplSet: true}, true);
+ reconnect(node);
+ assertDocsInColl(node, []);
+
+ jsTestLog("Test that on restart with the flag set we play recovery.");
+ node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on});
+ reconnect(node);
+ assertDocsInColl(node, [3, 4, 5]);
+
+ jsTestLog("Test that we go into read-only mode.");
+ assert.commandFailedWithCode(getColl(node).insert({_id: 1}), ErrorCodes.IllegalOperation);
+
+ jsTestLog("Test that on restart after standalone recovery we do not see replicated writes.");
+ node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: off});
+ reconnect(node);
+ assertDocsInColl(node, []);
+ assert.commandWorked(getColl(node).insert({_id: 6}));
+ assertDocsInColl(node, [6]);
+ node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on});
+ reconnect(node);
+ assertDocsInColl(node, [3, 4, 5, 6]);
+
+ jsTestLog("Test that we can restart again as a replica set node.");
+ node = rst.restart(node, {noReplSet: false, recoverFromOplogAsStandalone: off});
+ reconnect(node);
+ assert.eq(rst.getPrimary(), node);
+ assertDocsInColl(node, [3, 4, 5, 6]);
+
+ jsTestLog("Test that we can still recover as a standalone.");
+ assert.commandWorked(getColl(node).insert({_id: 7}));
+ assertDocsInColl(node, [3, 4, 5, 6, 7]);
+ node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: off});
+ reconnect(node);
+ assertDocsInColl(node, [6]);
+ node = rst.restart(node, {noReplSet: true, recoverFromOplogAsStandalone: on});
+ reconnect(node);
+ assertDocsInColl(node, [3, 4, 5, 6, 7]);
+
+ jsTestLog("Restart as a replica set node so that the test can complete successfully.");
+ node = rst.restart(node, {noReplSet: false, recoverFromOplogAsStandalone: off});
+ reconnect(node);
+ assert.eq(rst.getPrimary(), node);
+ assertDocsInColl(node, [3, 4, 5, 6, 7]);
+
+ restartServerReplication(secondary);
+
+ // Skip checking db hashes since we do a write as a standalone.
+ TestData.skipCheckDBHashes = true;
+ rst.stopSet();
+})(); \ No newline at end of file
diff --git a/jstests/replsets/clean_shutdown_oplog_state.js b/jstests/replsets/clean_shutdown_oplog_state.js
index 76d30c123b0..6aec7f6dce3 100644
--- a/jstests/replsets/clean_shutdown_oplog_state.js
+++ b/jstests/replsets/clean_shutdown_oplog_state.js
@@ -3,13 +3,7 @@
// present without this test failing. In particular if the rst.stop(1) doesn't execute mid-batch,
// it isn't fully exercising the code. However, if the test fails there is definitely a bug.
//
-// SERVER-33525: Adding `requires_mmapv1`. This test shuts down MongoD while replicating and
-// brings it back up as a standalone. Then it asserts the documents in the collection exactly
-// match the entries in the oplog. With RTT, this assertion will only hold if the commit point is
-// also advancing at the same pace. However, there may be other, softer assertions this test can
-// make instead going forward.
-//
-// @tags: [requires_persistence, requires_mmapv1]
+// @tags: [requires_persistence]
(function() {
"use strict";
@@ -63,6 +57,12 @@
var options = slave.savedOptions;
options.noCleanData = true;
delete options.replSet;
+
+ var storageEngine = jsTest.options().storageEngine || "wiredTiger";
+ if (storageEngine === "wiredTiger") {
+ options.recoverFromOplogAsStandalone = "";
+ }
+
var conn = MongoRunner.runMongod(options);
assert.neq(null, conn, "secondary failed to start");
@@ -88,7 +88,9 @@
try {
assert.eq(collDoc._id, oplogDoc.o._id);
assert(!('begin' in minValidDoc), 'begin in minValidDoc');
- assert.eq(minValidDoc.ts, oplogDoc.ts);
+ if (storageEngine !== "wiredTiger") {
+ assert.eq(minValidDoc.ts, oplogDoc.ts);
+ }
assert.eq(oplogTruncateAfterPointDoc.oplogTruncateAfterPoint, Timestamp());
} catch (e) {
// TODO remove once SERVER-25777 is resolved.
diff --git a/jstests/replsets/temp_namespace_restart_as_standalone.js b/jstests/replsets/temp_namespace_restart_as_standalone.js
index 46144cc8b64..8c91465882c 100644
--- a/jstests/replsets/temp_namespace_restart_as_standalone.js
+++ b/jstests/replsets/temp_namespace_restart_as_standalone.js
@@ -2,11 +2,7 @@
* Tests that temporary collections are not dropped when a member of a replica set is started up as
* a stand-alone mongod, i.e. without the --replSet parameter.
*
- * This test restarts a node as a standalone. With RTT, standalones start up at a time in the past
- * since they do not perform replication recovery, so we must only run it with
- * mmapv1. SERVER-34070 will make this feasible to test again on RTT storage engines.
- *
- * @tags: [requires_persistence, requires_mmapv1]
+ * @tags: [requires_persistence]
*/
(function() {
var rst = new ReplSetTest({nodes: 2});
@@ -59,7 +55,13 @@
var secondaryNodeId = rst.getNodeId(secondaryDB.getMongo());
rst.stop(secondaryNodeId);
- secondaryConn = MongoRunner.runMongod({dbpath: secondaryConn.dbpath, noCleanData: true});
+ var storageEngine = jsTest.options().storageEngine || "wiredTiger";
+ if (storageEngine === "wiredTiger") {
+ secondaryConn = MongoRunner.runMongod(
+ {dbpath: secondaryConn.dbpath, noCleanData: true, recoverFromOplogAsStandalone: ""});
+ } else {
+ secondaryConn = MongoRunner.runMongod({dbpath: secondaryConn.dbpath, noCleanData: true});
+ }
assert.neq(null, secondaryConn, "secondary failed to start up as a stand-alone mongod");
secondaryDB = secondaryConn.getDB("test");
diff --git a/src/mongo/db/mongod_options.cpp b/src/mongo/db/mongod_options.cpp
index 141fa00ae38..61a1df60860 100644
--- a/src/mongo/db/mongod_options.cpp
+++ b/src/mongo/db/mongod_options.cpp
@@ -364,6 +364,15 @@ Status addMongodOptions(moe::OptionSection* options) {
"size to use (in MB) for replication op log. default is 5% of disk space "
"(i.e. large is good)");
+ replication_options
+ .addOptionChaining("replication.recoverFromOplogAsStandalone",
+ "recoverFromOplogAsStandalone",
+ moe::Switch,
+ "specifies that a standalone should execute replication recovery")
+ .hidden()
+ .incompatibleWith("replication.replSet")
+ .incompatibleWith("replication.replSetName");
+
rs_options
.addOptionChaining("replication.replSet",
"replSet",
@@ -1062,6 +1071,11 @@ Status storeMongodOptions(const moe::Environment& params) {
replSettings.setOplogSizeBytes(x * 1024 * 1024);
invariant(replSettings.getOplogSizeBytes() > 0);
}
+ if (params.count("replication.recoverFromOplogAsStandalone")) {
+ replSettings.setShouldRecoverFromOplogAsStandalone(
+ params["replication.recoverFromOplogAsStandalone"].as<bool>());
+ }
+
if (params.count("cacheSize")) {
long x = params["cacheSize"].as<long>();
if (x <= 0) {
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index e793ab4a6f4..6e083990f4e 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -1144,7 +1144,7 @@ Status applyOperation_inlock(OperationContext* opCtx,
// the individual operations will not contain a `ts` field. The caller is responsible for
// setting the timestamp before committing. Assigning a competing timestamp in this
// codepath would break that atomicity. Sharding is a consumer of this use-case.
- const bool assignOperationTimestamp = [opCtx, haveWrappingWriteUnitOfWork] {
+ const bool assignOperationTimestamp = [opCtx, haveWrappingWriteUnitOfWork, mode] {
const auto replMode = ReplicationCoordinator::get(opCtx)->getReplicationMode();
if (opCtx->writesAreReplicated()) {
// We do not assign timestamps on replicated writes since they will get their oplog
@@ -1163,8 +1163,9 @@ Status applyOperation_inlock(OperationContext* opCtx,
break;
}
case ReplicationCoordinator::modeNone: {
- // We do not assign timestamps on standalones.
- return false;
+ // Only assign timestamps on standalones during replication recovery when
+ // started with 'recoverFromOplogAsStandalone'.
+ return mode == OplogApplication::Mode::kRecovering;
}
}
}
@@ -1553,7 +1554,7 @@ Status applyCommand_inlock(OperationContext* opCtx,
}
}
- const bool assignCommandTimestamp = [opCtx] {
+ const bool assignCommandTimestamp = [opCtx, mode] {
const auto replMode = ReplicationCoordinator::get(opCtx)->getReplicationMode();
if (opCtx->writesAreReplicated()) {
// We do not assign timestamps on replicated writes since they will get their oplog
@@ -1570,8 +1571,9 @@ Status applyCommand_inlock(OperationContext* opCtx,
return true;
}
case ReplicationCoordinator::modeNone: {
- // We do not assign timestamps on standalones.
- return false;
+ // Only assign timestamps on standalones during replication recovery when
+ // started with 'recoverFromOplogAsStandalone'.
+ return mode == OplogApplication::Mode::kRecovering;
}
}
MONGO_UNREACHABLE;
diff --git a/src/mongo/db/repl/repl_settings.cpp b/src/mongo/db/repl/repl_settings.cpp
index 3fb69df5d3d..bf64b2bf057 100644
--- a/src/mongo/db/repl/repl_settings.cpp
+++ b/src/mongo/db/repl/repl_settings.cpp
@@ -60,6 +60,10 @@ std::string ReplSettings::getReplSetString() const {
return _replSetString;
}
+bool ReplSettings::getShouldRecoverFromOplogAsStandalone() const {
+ return _shouldRecoverFromOplogAsStandalone;
+}
+
ReplSettings::IndexPrefetchConfig ReplSettings::getPrefetchIndexMode() const {
return _prefetchIndexMode;
}
@@ -80,6 +84,10 @@ void ReplSettings::setReplSetString(std::string replSetString) {
_replSetString = replSetString;
}
+void ReplSettings::setShouldRecoverFromOplogAsStandalone(bool shouldRecover) {
+ _shouldRecoverFromOplogAsStandalone = shouldRecover;
+}
+
void ReplSettings::setPrefetchIndexMode(std::string prefetchIndexModeString) {
if (prefetchIndexModeString.empty()) {
_prefetchIndexMode = IndexPrefetchConfig::UNINITIALIZED;
diff --git a/src/mongo/db/repl/repl_settings.h b/src/mongo/db/repl/repl_settings.h
index 1a74414dade..9f75c10d712 100644
--- a/src/mongo/db/repl/repl_settings.h
+++ b/src/mongo/db/repl/repl_settings.h
@@ -56,6 +56,7 @@ public:
*/
long long getOplogSizeBytes() const;
std::string getReplSetString() const;
+ bool getShouldRecoverFromOplogAsStandalone() const;
/**
* Note: _prefetchIndexMode is initialized to UNINITIALIZED by default.
@@ -75,12 +76,15 @@ public:
void setOplogSizeBytes(long long oplogSizeBytes);
void setReplSetString(std::string replSetString);
void setPrefetchIndexMode(std::string prefetchIndexModeString);
+ void setShouldRecoverFromOplogAsStandalone(bool shouldRecover);
private:
long long _oplogSizeBytes = 0; // --oplogSize
std::string _replSetString; // --replSet[/<seedlist>]
+ bool _shouldRecoverFromOplogAsStandalone = false; // --shouldRecoverFromOplogAsStandalone
+
// --indexPrefetch
IndexPrefetchConfig _prefetchIndexMode = IndexPrefetchConfig::UNINITIALIZED;
};
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index b52d9e05b37..6922a0f51ef 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -125,7 +125,6 @@ MONGO_INITIALIZER(periodicNoopIntervalSecs)(InitializerContext*) {
return Status::OK();
}
-
/**
* Allows non-local writes despite _canAcceptNonlocalWrites being false on a single OperationContext
* while in scope.
@@ -738,11 +737,34 @@ void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
void ReplicationCoordinatorImpl::startup(OperationContext* opCtx) {
if (!isReplEnabled()) {
+ if (_settings.getShouldRecoverFromOplogAsStandalone()) {
+ if (!_storage->supportsRecoverToStableTimestamp(opCtx->getServiceContext())) {
+ severe() << "Cannot use 'recoverFromOplogAsStandalone' with a storage engine that "
+ "does not support recover to stable timestamp.";
+ fassertFailedNoTrace(50805);
+ }
+ auto recoveryTS = _storage->getRecoveryTimestamp(opCtx->getServiceContext());
+ if (!recoveryTS || recoveryTS->isNull()) {
+ severe()
+ << "Cannot use 'recoverFromOplogAsStandalone' without a stable checkpoint.";
+ fassertFailedNoTrace(50806);
+ }
+
+ // We pass in "none" for the stable timestamp so that recoverFromOplog asks storage
+ // for the recoveryTimestamp just like on replica set recovery.
+ const auto stableTimestamp = boost::none;
+ _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp);
+ warning() << "Setting mongod to readOnly mode as a result of specifying "
+ "'recoverFromOplogAsStandalone'.";
+ storageGlobalParams.readOnly = true;
+ }
+
stdx::lock_guard<stdx::mutex> lk(_mutex);
_setConfigState_inlock(kConfigReplicationDisabled);
return;
}
invariant(_settings.usingReplSets());
+ invariant(!_settings.getShouldRecoverFromOplogAsStandalone());
{
stdx::lock_guard<stdx::mutex> lk(_mutex);
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index e8f010f2b11..24bc1c8d60b 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -84,7 +84,7 @@ void ReplicationRecoveryImpl::recoverFromOplog(OperationContext* opCtx,
topOfOplogSW.getStatus() == ErrorCodes::NamespaceNotFound) {
// Oplog is empty. There are no oplog entries to apply, so we exit recovery and go into
// initial sync.
- log() << "No oplog entries to apply for recovery. Oplog is empty. Entering initial sync.";
+ log() << "No oplog entries to apply for recovery. Oplog is empty.";
return;
}
fassert(40290, topOfOplogSW);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
index 938b49876c4..c98617bdc78 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_index.cpp
@@ -222,8 +222,9 @@ StatusWith<std::string> WiredTigerIndex::generateCreateString(const std::string&
<< "formatVersion=" << keyStringVersion << ',' << "infoObj=" << desc.infoObj().jsonString()
<< "),";
- if (WiredTigerUtil::useTableLogging(NamespaceString(desc.parentNS()),
- getGlobalReplSettings().usingReplSets())) {
+ bool replicatedWrites = getGlobalReplSettings().usingReplSets() ||
+ getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone();
+ if (WiredTigerUtil::useTableLogging(NamespaceString(desc.parentNS()), replicatedWrites)) {
ss << "log=(enabled=true)";
} else {
ss << "log=(enabled=false)";
@@ -272,11 +273,12 @@ WiredTigerIndex::WiredTigerIndex(OperationContext* ctx,
version.getValue() == kKeyStringV1Version ? KeyString::Version::V1 : KeyString::Version::V0;
if (!isReadOnly) {
+ bool replicatedWrites = getGlobalReplSettings().usingReplSets() ||
+ getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone();
uassertStatusOK(WiredTigerUtil::setTableLogging(
ctx,
uri,
- WiredTigerUtil::useTableLogging(NamespaceString(desc->parentNS()),
- getGlobalReplSettings().usingReplSets())));
+ WiredTigerUtil::useTableLogging(NamespaceString(desc->parentNS()), replicatedWrites)));
}
}
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
index 717adb8c4bd..bbefc5c9b11 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp
@@ -595,8 +595,9 @@ StatusWith<std::string> WiredTigerRecordStore::generateCreateString(
}
ss << ")";
- if (WiredTigerUtil::useTableLogging(NamespaceString(ns),
- getGlobalReplSettings().usingReplSets())) {
+ bool replicatedWrites = getGlobalReplSettings().usingReplSets() ||
+ getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone();
+ if (WiredTigerUtil::useTableLogging(NamespaceString(ns), replicatedWrites)) {
ss << ",log=(enabled=true)";
} else {
ss << ",log=(enabled=false)";
@@ -647,11 +648,10 @@ WiredTigerRecordStore::WiredTigerRecordStore(WiredTigerKVEngine* kvEngine,
}
if (!params.isReadOnly) {
+ bool replicatedWrites = getGlobalReplSettings().usingReplSets() ||
+ getGlobalReplSettings().getShouldRecoverFromOplogAsStandalone();
uassertStatusOK(WiredTigerUtil::setTableLogging(
- ctx,
- _uri,
- WiredTigerUtil::useTableLogging(NamespaceString(ns()),
- getGlobalReplSettings().usingReplSets())));
+ ctx, _uri, WiredTigerUtil::useTableLogging(NamespaceString(ns()), replicatedWrites)));
}
if (_isOplog) {