summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2016-08-15 15:47:22 -0400
committerJudah Schvimer <judah@mongodb.com>2016-08-15 15:48:08 -0400
commit10ff598752d57650783c63166180df31f907df12 (patch)
tree2a07c948cdb8ec81491ce40b85652422c034dc89
parent5a728c7a8d99d4efe06fd7c19a1b9d82879ee49e (diff)
downloadmongo-10ff598752d57650783c63166180df31f907df12.tar.gz
SERVER-25125 Report initial sync progress in ReplSetGetStatus
-rw-r--r--jstests/noPassthroughWithMongod/initial_sync_replSetGetStatus.js109
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp25
-rw-r--r--src/mongo/db/repl/collection_cloner.h2
-rw-r--r--src/mongo/db/repl/data_replicator.cpp123
-rw-r--r--src/mongo/db/repl/data_replicator.h31
-rw-r--r--src/mongo/db/repl/data_replicator_test.cpp196
-rw-r--r--src/mongo/db/repl/database_cloner.cpp44
-rw-r--r--src/mongo/db/repl/database_cloner.h5
-rw-r--r--src/mongo/db/repl/databases_cloner.cpp44
-rw-r--r--src/mongo/db/repl/databases_cloner.h18
-rw-r--r--src/mongo/db/repl/initial_sync_state.h1
-rw-r--r--src/mongo/db/repl/replication_coordinator.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h5
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h2
-rw-r--r--src/mongo/db/repl/replset_commands.cpp13
-rw-r--r--src/mongo/db/repl/topology_coordinator.h1
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl.cpp4
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_test.cpp19
-rw-r--r--src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp20
22 files changed, 621 insertions, 70 deletions
diff --git a/jstests/noPassthroughWithMongod/initial_sync_replSetGetStatus.js b/jstests/noPassthroughWithMongod/initial_sync_replSetGetStatus.js
new file mode 100644
index 00000000000..df5600dc5a6
--- /dev/null
+++ b/jstests/noPassthroughWithMongod/initial_sync_replSetGetStatus.js
@@ -0,0 +1,109 @@
+/**
+ * This test tests that replSetGetStatus returns initial sync stats while initial sync is in
+ * progress.
+ */
+
+(function() {
+ "use strict";
+ // If the parameter is already set, don't run this test.
+ var parameters = db.adminCommand({getCmdLineOpts: 1}).parsed.setParameter;
+ if (parameters.use3dot2InitialSync || parameters.initialSyncOplogBuffer) {
+ jsTest.log("Skipping initial_sync_parameters.js because use3dot2InitialSync or " +
+ "initialSyncOplogBuffer was already provided.");
+ return;
+ }
+
+ var name = 'initial_sync_replSetGetStatus';
+ var replSet = new ReplSetTest({
+ name: name,
+ nodes: 1,
+ });
+
+ replSet.startSet();
+ replSet.initiate();
+ var primary = replSet.getPrimary();
+
+ var coll = primary.getDB('test').foo;
+ assert.writeOK(coll.insert({a: 1}));
+ assert.writeOK(coll.insert({a: 2}));
+
+ // Add a secondary node but make it hang before copying databases.
+ var secondary = replSet.add(
+ {setParameter: {use3dot2InitialSync: false, initialSyncOplogBuffer: "collection"}});
+ secondary.setSlaveOk();
+
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'alwaysOn'}));
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeFinish', mode: 'alwaysOn'}));
+ replSet.reInitiate();
+
+ // Wait for fail point message to be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000);
+ };
+
+ // Wait for initial sync to pause before it copies the databases.
+ checkLog(secondary, 'initial sync - initialSyncHangBeforeCopyingDatabases fail point enabled');
+
+ // Test that replSetGetStatus returns the correct results while initial sync is in progress.
+ var res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
+ assert(!res.initialSyncStatus,
+ "Response should not have an 'initialSyncStatus' field: " + tojson(res));
+
+ res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 1}));
+ assert(res.initialSyncStatus,
+ "Response should have an 'initialSyncStatus' field: " + tojson(res));
+
+ assert.commandFailed(secondary.adminCommand({replSetGetStatus: 1, initialSync: "t"}),
+ ErrorCodes.TypeMismatch);
+
+ assert.writeOK(coll.insert({a: 3}));
+ assert.writeOK(coll.insert({a: 4}));
+
+ // Let initial sync continue working.
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeCopyingDatabases', mode: 'off'}));
+
+ // Wait for initial sync to pause right before it finishes.
+ checkLog(secondary, 'initial sync - initialSyncHangBeforeFinish fail point enabled');
+
+ // Test that replSetGetStatus returns the correct results when initial sync is at the very end.
+ res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 1}));
+ assert(res.initialSyncStatus, "Response should have an 'initialSyncStatus' field.");
+ assert.eq(res.initialSyncStatus.fetchedMissingDocs, 0);
+ assert.eq(res.initialSyncStatus.appliedOps, 2);
+ assert.eq(res.initialSyncStatus.failedInitialSyncAttempts, 0);
+ assert.eq(res.initialSyncStatus.maxFailedInitialSyncAttempts, 10);
+ assert.eq(res.initialSyncStatus.databases.databasesCloned, 1);
+ assert.eq(res.initialSyncStatus.databases.test.collections, 1);
+ assert.eq(res.initialSyncStatus.databases.test.clonedCollections, 1);
+ assert.eq(res.initialSyncStatus.databases.test["test.foo"].documents, 4);
+ assert.eq(res.initialSyncStatus.databases.test["test.foo"].indexes, 1);
+ assert.eq(res.initialSyncStatus.databases.test["test.foo"].fetchedBatches, 1);
+
+ // Let initial sync finish and get into secondary state.
+ assert.commandWorked(secondary.getDB('admin').runCommand(
+ {configureFailPoint: 'initialSyncHangBeforeFinish', mode: 'off'}));
+ replSet.awaitSecondaryNodes(60 * 1000);
+
+ // Test that replSetGetStatus returns the correct results after initial sync is finished.
+ res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1}));
+ assert(!res.initialSyncStatus,
+ "Response should not have an 'initialSyncStatus' field: " + tojson(res));
+
+ res = assert.commandWorked(secondary.adminCommand({replSetGetStatus: 1, initialSync: 1}));
+ assert(!res.initialSyncStatus,
+ "Response should not have an 'initialSyncStatus' field: " + tojson(res));
+
+ assert.commandFailedWithCode(secondary.adminCommand({replSetGetStatus: 1, initialSync: "m"}),
+ ErrorCodes.TypeMismatch);
+})();
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index a5bab29f76d..fa85b2b2fcc 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -128,6 +128,7 @@ CollectionCloner::CollectionCloner(executor::TaskExecutor* executor,
uassertStatusOK(options.validate());
uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
uassert(ErrorCodes::BadValue, "storage interface cannot be null", storageInterface);
+ _stats.ns = _sourceNss.ns();
}
CollectionCloner::~CollectionCloner() {
@@ -435,16 +436,24 @@ std::string CollectionCloner::Stats::toString() const {
BSONObj CollectionCloner::Stats::toBSON() const {
BSONObjBuilder bob;
- bob.appendNumber("documents", documents);
- bob.appendNumber("indexes", indexes);
- bob.appendNumber("fetchedBatches", fetchBatches);
- bob.appendDate("start", start);
- bob.appendDate("end", end);
- auto elapsed = end - start;
- long long elapsedMillis = duration_cast<Milliseconds>(elapsed).count();
- bob.appendNumber("elapsedMillis", elapsedMillis);
+ bob.append("ns", ns);
+ append(&bob);
return bob.obj();
}
+void CollectionCloner::Stats::append(BSONObjBuilder* builder) const {
+ builder->appendNumber("documents", documents);
+ builder->appendNumber("indexes", indexes);
+ builder->appendNumber("fetchedBatches", fetchBatches);
+ if (start != Date_t()) {
+ builder->appendDate("start", start);
+ if (end != Date_t()) {
+ builder->appendDate("end", end);
+ auto elapsed = end - start;
+ long long elapsedMillis = duration_cast<Milliseconds>(elapsed).count();
+ builder->appendNumber("elapsedMillis", elapsedMillis);
+ }
+ }
+}
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/collection_cloner.h b/src/mongo/db/repl/collection_cloner.h
index 02c7d872ed9..0f9c0bc103a 100644
--- a/src/mongo/db/repl/collection_cloner.h
+++ b/src/mongo/db/repl/collection_cloner.h
@@ -61,6 +61,7 @@ class CollectionCloner : public BaseCloner {
public:
struct Stats {
+ std::string ns;
Date_t start;
Date_t end;
size_t documents{0};
@@ -69,6 +70,7 @@ public:
std::string toString() const;
BSONObj toBSON() const;
+ void append(BSONObjBuilder* builder) const;
};
/**
* Type of function to schedule storage interface tasks with the executor.
diff --git a/src/mongo/db/repl/data_replicator.cpp b/src/mongo/db/repl/data_replicator.cpp
index d3052f667c1..d3b999521ac 100644
--- a/src/mongo/db/repl/data_replicator.cpp
+++ b/src/mongo/db/repl/data_replicator.cpp
@@ -60,7 +60,6 @@
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/time_support.h"
-#include "mongo/util/timer.h"
namespace mongo {
namespace repl {
@@ -341,6 +340,32 @@ std::string DataReplicator::getDiagnosticString() const {
return out;
}
+BSONObj DataReplicator::getInitialSyncProgress() const {
+ LockGuard lk(_mutex);
+ return _getInitialSyncProgress_inlock();
+}
+
+BSONObj DataReplicator::_getInitialSyncProgress_inlock() const {
+ BSONObjBuilder bob;
+ try {
+ _stats.append(&bob);
+ if (_initialSyncState) {
+ bob.appendNumber("fetchedMissingDocs", _initialSyncState->fetchedMissingDocs);
+ bob.appendNumber("appliedOps", _initialSyncState->appliedOps);
+ if (_initialSyncState->dbsCloner) {
+ BSONObjBuilder dbsBuilder(bob.subobjStart("databases"));
+ _initialSyncState->dbsCloner->getStats().append(&dbsBuilder);
+ dbsBuilder.doneFast();
+ }
+ }
+ } catch (const DBException& e) {
+ bob.resetToEmpty();
+ bob.append("error", e.toString());
+ log() << "Error creating initial sync progress object: " << e.toString();
+ }
+ return bob.obj();
+}
+
Status DataReplicator::resume(bool wait) {
CBHStatus handle = _exec->scheduleWork(
stdx::bind(&DataReplicator::_resumeFinish, this, stdx::placeholders::_1));
@@ -622,13 +647,13 @@ Status DataReplicator::_runInitialSyncAttempt_inlock(OperationContext* txn,
StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
std::size_t maxRetries) {
- Timer t;
if (!txn) {
std::string msg = "Initial Sync attempted but no OperationContext*, so aborting.";
error() << msg;
return Status{ErrorCodes::InitialSyncFailure, msg};
}
UniqueLock lk(_mutex);
+ _stats.initialSyncStart = _exec->now();
if (_state != DataReplicatorState::Uninitialized) {
if (_state == DataReplicatorState::InitialSync)
return {ErrorCodes::InitialSyncActive,
@@ -663,9 +688,9 @@ StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
_storage->setInitialSyncFlag(txn);
lk.lock();
- const auto maxFailedAttempts = maxRetries + 1;
- std::size_t failedAttempts = 0;
- while (failedAttempts < maxFailedAttempts) {
+ _stats.maxFailedInitialSyncAttempts = maxRetries + 1;
+ _stats.failedInitialSyncAttempts = 0;
+ while (_stats.failedInitialSyncAttempts < _stats.maxFailedInitialSyncAttempts) {
Status attemptErrorStatus(Status::OK());
_setState_inlock(DataReplicatorState::InitialSync);
_reporterPaused = true;
@@ -704,6 +729,27 @@ StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
}
}
+ auto runTime = _initialSyncState ? _initialSyncState->timer.millis() : 0;
+ _stats.initialSyncAttemptInfos.emplace_back(
+ DataReplicator::InitialSyncAttemptInfo{runTime, attemptErrorStatus, _syncSource});
+
+ // If the status is ok now then initial sync is over. We must do this before we reset
+ // _initialSyncState and lose the DatabasesCloner's stats.
+ if (attemptErrorStatus.isOK()) {
+ _stats.initialSyncEnd = _exec->now();
+ log() << "Initial Sync Statistics: " << _getInitialSyncProgress_inlock();
+ if (MONGO_FAIL_POINT(initialSyncHangBeforeFinish)) {
+ lk.unlock();
+ // This log output is used in js tests so please leave it.
+ log() << "initial sync - initialSyncHangBeforeFinish fail point "
+ "enabled. Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(initialSyncHangBeforeFinish)) {
+ mongo::sleepsecs(1);
+ }
+ lk.lock();
+ }
+ }
+
// Cleanup
_cancelAllHandles_inlock();
_waitOnAndResetAll_inlock(&lk);
@@ -713,19 +759,22 @@ StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
break;
}
- ++failedAttempts;
+ ++_stats.failedInitialSyncAttempts;
error() << "Initial sync attempt failed -- attempts left: "
- << (maxFailedAttempts - failedAttempts) << " cause: " << attemptErrorStatus;
+ << (_stats.maxFailedInitialSyncAttempts - _stats.failedInitialSyncAttempts)
+ << " cause: " << attemptErrorStatus;
// Check if need to do more retries.
- if (failedAttempts >= maxFailedAttempts) {
+ if (_stats.failedInitialSyncAttempts >= _stats.maxFailedInitialSyncAttempts) {
const std::string err =
"The maximum number of retries"
" have been exhausted for initial sync.";
severe() << err;
_setState_inlock(DataReplicatorState::Uninitialized);
+ _stats.initialSyncEnd = _exec->now();
+ log() << "Initial Sync Statistics: " << _getInitialSyncProgress_inlock();
return attemptErrorStatus;
}
@@ -735,16 +784,6 @@ StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
lk.lock();
}
- if (MONGO_FAIL_POINT(initialSyncHangBeforeFinish)) {
- lk.unlock();
- // This log output is used in js tests so please leave it.
- log() << "initial sync - initialSyncHangBeforeFinish fail point "
- "enabled. Blocking until fail point is disabled.";
- while (MONGO_FAIL_POINT(initialSyncHangBeforeFinish)) {
- mongo::sleepsecs(1);
- }
- lk.lock();
- }
_reporterPaused = false;
_fetcherPaused = false;
_applierPaused = false;
@@ -755,7 +794,8 @@ StatusWith<OpTimeWithHash> DataReplicator::doInitialSync(OperationContext* txn,
_storage->clearInitialSyncFlag(txn);
_storage->setMinValid(txn, _lastApplied.opTime, DurableRequirement::Strong);
_opts.setMyLastOptime(_lastApplied.opTime);
- log() << "initial sync done; took " << t.millis() << " milliseconds.";
+ log() << "initial sync done; took " << _stats.initialSyncEnd - _stats.initialSyncStart
+ << " milliseconds.";
return _lastApplied;
}
@@ -1452,5 +1492,50 @@ void DataReplicator::_rollbackOperations(const CallbackArgs& cbData) {
_doNextActions();
};
+std::string DataReplicator::Stats::toString() const {
+ return toBSON().toString();
+}
+
+BSONObj DataReplicator::Stats::toBSON() const {
+ BSONObjBuilder bob;
+ append(&bob);
+ return bob.obj();
+}
+
+void DataReplicator::Stats::append(BSONObjBuilder* builder) const {
+ builder->appendNumber("failedInitialSyncAttempts", failedInitialSyncAttempts);
+ builder->appendNumber("maxFailedInitialSyncAttempts", maxFailedInitialSyncAttempts);
+ if (initialSyncStart != Date_t()) {
+ builder->appendDate("initialSyncStart", initialSyncStart);
+ if (initialSyncEnd != Date_t()) {
+ builder->appendDate("initialSyncEnd", initialSyncEnd);
+ auto elapsed = initialSyncEnd - initialSyncStart;
+ long long elapsedMillis = duration_cast<Milliseconds>(elapsed).count();
+ builder->appendNumber("initialSyncElapsedMillis", elapsedMillis);
+ }
+ }
+ BSONArrayBuilder arrBuilder(builder->subarrayStart("initialSyncAttempts"));
+ for (unsigned int i = 0; i < initialSyncAttemptInfos.size(); ++i) {
+ arrBuilder.append(initialSyncAttemptInfos[i].toBSON());
+ }
+ arrBuilder.doneFast();
+}
+
+std::string DataReplicator::InitialSyncAttemptInfo::toString() const {
+ return toBSON().toString();
+}
+
+BSONObj DataReplicator::InitialSyncAttemptInfo::toBSON() const {
+ BSONObjBuilder bob;
+ append(&bob);
+ return bob.obj();
+}
+
+void DataReplicator::InitialSyncAttemptInfo::append(BSONObjBuilder* builder) const {
+ builder->appendNumber("durationMillis", durationMillis);
+ builder->append("status", status.toString());
+ builder->append("syncSource", syncSource.toString());
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/data_replicator.h b/src/mongo/db/repl/data_replicator.h
index 568a5d23944..17e2bc0e110 100644
--- a/src/mongo/db/repl/data_replicator.h
+++ b/src/mongo/db/repl/data_replicator.h
@@ -178,6 +178,28 @@ struct DataReplicatorOptions {
*/
class DataReplicator {
public:
+ struct InitialSyncAttemptInfo {
+ int durationMillis;
+ Status status;
+ HostAndPort syncSource;
+
+ std::string toString() const;
+ BSONObj toBSON() const;
+ void append(BSONObjBuilder* builder) const;
+ };
+
+ struct Stats {
+ size_t failedInitialSyncAttempts;
+ size_t maxFailedInitialSyncAttempts;
+ Date_t initialSyncStart;
+ Date_t initialSyncEnd;
+ std::vector<DataReplicator::InitialSyncAttemptInfo> initialSyncAttemptInfos;
+
+ std::string toString() const;
+ BSONObj toBSON() const;
+ void append(BSONObjBuilder* builder) const;
+ };
+
DataReplicator(DataReplicatorOptions opts,
std::unique_ptr<DataReplicatorExternalState> dataReplicatorExternalState,
StorageInterface* storage);
@@ -242,6 +264,12 @@ public:
std::string getDiagnosticString() const;
+ /**
+ * Returns stats about the progress of initial sync. If initial sync is not in progress it
+ * returns summary statistics for what occurred during initial sync.
+ */
+ BSONObj getInitialSyncProgress() const;
+
// For testing only
void _resetState_inlock(OperationContext* txn, OpTimeWithHash lastAppliedOpTime);
@@ -283,6 +311,8 @@ private:
void _doNextActions_Rollback_inlock();
void _doNextActions_Steady_inlock();
+ BSONObj _getInitialSyncProgress_inlock() const;
+
// Applies up till the specified Timestamp and pauses automatic application
Timestamp _applyUntilAndPause(Timestamp);
Timestamp _applyUntil(Timestamp);
@@ -348,6 +378,7 @@ private:
Event _onShutdown; // (M)
Timestamp _rollbackCommonOptime; // (MX)
CollectionCloner::ScheduleDbWorkFn _scheduleDbWorkFn; // (M)
+ Stats _stats; // (M)
};
} // namespace repl
diff --git a/src/mongo/db/repl/data_replicator_test.cpp b/src/mongo/db/repl/data_replicator_test.cpp
index 1625f864db7..6d50bb97613 100644
--- a/src/mongo/db/repl/data_replicator_test.cpp
+++ b/src/mongo/db/repl/data_replicator_test.cpp
@@ -464,6 +464,10 @@ public:
_condVar.wait(lk);
}
+ BSONObj getInitialSyncProgress() {
+ return _dr->getInitialSyncProgress();
+ }
+
private:
void _run() {
setThreadName("InitialSyncRunner");
@@ -671,6 +675,10 @@ protected:
ASSERT_EQ(_isbr->getResult(net).getStatus().code(), code) << "status codes differ";
}
+ BSONObj getInitialSyncProgress() {
+ return _isbr->getInitialSyncProgress();
+ }
+
// Generate at least one getMore response.
std::size_t numGetMoreOplogEntries = 0;
std::size_t numGetMoreOplogEntriesMax = 1;
@@ -1237,6 +1245,194 @@ TEST_F(InitialSyncTest, InitialSyncStateIsResetAfterFailure) {
verifySync(getNet(), ErrorCodes::UnrecoverableRollbackError);
}
+TEST_F(InitialSyncTest, GetInitialSyncProgressReturnsCorrectProgress) {
+ const Responses failedResponses = {
+ {"replSetGetRBID", fromjson(str::stream() << "{ok: 1, rbid:1}")},
+ // get latest oplog ts
+ {"find",
+ fromjson(
+ str::stream() << "{ok:1, cursor:{id:NumberLong(0), ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:NumberLong(1), ns:'a.a', v:"
+ << OplogEntry::kOplogVersion
+ << ", op:'i', o:{_id:1, a:1}}]}}")},
+ // oplog fetcher find
+ {"find",
+ fromjson(
+ str::stream() << "{ok:1, cursor:{id:NumberLong(1), ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:NumberLong(1), ns:'a.a', v:"
+ << OplogEntry::kOplogVersion
+ << ", op:'i', o:{_id:1, a:1}}]}}")},
+ // Clone Start
+ // listDatabases
+ {"listDatabases",
+ fromjson("{ok:0, errmsg:'fail on clone -- listDBs injected failure', code:9}")},
+ // rollback checker.
+ {"replSetGetRBID", fromjson(str::stream() << "{ok: 1, rbid:1}")},
+ };
+
+ const Responses successfulResponses =
+ {
+ {"replSetGetRBID", fromjson(str::stream() << "{ok: 1, rbid:1}")},
+ // get latest oplog ts
+ {"find",
+ fromjson(str::stream()
+ << "{ok:1, cursor:{id:NumberLong(0), ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:NumberLong(1), ns:'a.a', v:"
+ << OplogEntry::kOplogVersion
+ << ", op:'i', o:{_id:1, a:1}}]}}")},
+ // oplog fetcher find
+ {"find",
+ fromjson(str::stream()
+ << "{ok:1, cursor:{id:NumberLong(1), ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(1,1), h:NumberLong(1), ns:'a.a', v:"
+ << OplogEntry::kOplogVersion
+ << ", op:'i', o:{_id:1, a:1}}]}}")},
+ // Clone Start
+ // listDatabases
+ {"listDatabases", fromjson("{ok:1, databases:[{name:'a'}]}")},
+ // listCollections for "a"
+ {"listCollections",
+ fromjson("{ok:1, cursor:{id:NumberLong(0), ns:'a.$cmd.listCollections', firstBatch:["
+ "{name:'a', options:{}} "
+ "]}}")},
+ // listIndexes:a
+ {"listIndexes",
+ fromjson(str::stream()
+ << "{ok:1, cursor:{id:NumberLong(0), ns:'a.$cmd.listIndexes.a', firstBatch:["
+ "{v:"
+ << OplogEntry::kOplogVersion
+ << ", key:{_id:1}, name:'_id_', ns:'a.a'}]}}")},
+ // find:a
+ {"find",
+ fromjson("{ok:1, cursor:{id:NumberLong(0), ns:'a.a', firstBatch:["
+ "{_id:1, a:1} "
+ "]}}")},
+ // Clone Done
+ // get latest oplog ts
+ {"find",
+ fromjson(str::stream()
+ << "{ok:1, cursor:{id:NumberLong(0), ns:'local.oplog.rs', firstBatch:["
+ "{ts:Timestamp(7,1), h:NumberLong(1), ns:'a.a', v:"
+ << OplogEntry::kOplogVersion
+ << ", op:'i', o:{_id:5, a:2}}]}}")},
+ {"replSetGetRBID", fromjson(str::stream() << "{ok: 1, rbid:1}")},
+ // Applier starts ...
+ };
+
+ startSync(1);
+
+ BSONObj progress = getInitialSyncProgress();
+ ASSERT_EQUALS(progress.nFields(), 4);
+ ASSERT_EQUALS(progress.getIntField("failedInitialSyncAttempts"), 0);
+ ASSERT_EQUALS(progress.getIntField("maxFailedInitialSyncAttempts"), 2);
+ ASSERT_EQUALS(progress["initialSyncStart"].type(), Date);
+ ASSERT_EQUALS(progress.getObjectField("initialSyncAttempts"), BSONObj());
+
+ // Play first response to ensure data replicator has entered initial sync state.
+ setResponses({failedResponses.begin(), failedResponses.begin() + 1});
+ numGetMoreOplogEntriesMax = 7;
+ playResponses(false);
+ log() << "Done playing first failed response";
+
+ progress = getInitialSyncProgress();
+ ASSERT_EQUALS(progress.nFields(), 7);
+ ASSERT_EQUALS(progress.getIntField("failedInitialSyncAttempts"), 0);
+ ASSERT_EQUALS(progress.getIntField("maxFailedInitialSyncAttempts"), 2);
+ ASSERT_EQUALS(progress["initialSyncStart"].type(), Date);
+ ASSERT_EQUALS(progress.getObjectField("initialSyncAttempts"), BSONObj());
+ ASSERT_EQUALS(progress.getIntField("fetchedMissingDocs"), 0);
+ ASSERT_EQUALS(progress.getIntField("appliedOps"), 0);
+ ASSERT_EQUALS(progress.getObjectField("databases"), BSON("databasesCloned" << 0));
+
+ // Play rest of the failed round of responses.
+ setResponses({failedResponses.begin() + 1, failedResponses.end()});
+ playResponses(false);
+ log() << "Done playing failed responses";
+
+ // Play the first response of the successful round of responses.
+ setResponses({successfulResponses.begin(), successfulResponses.begin() + 1});
+ playResponses(false);
+ log() << "Done playing first successful response";
+
+ progress = getInitialSyncProgress();
+ ASSERT_EQUALS(progress.nFields(), 7);
+ ASSERT_EQUALS(progress.getIntField("failedInitialSyncAttempts"), 1);
+ ASSERT_EQUALS(progress.getIntField("maxFailedInitialSyncAttempts"), 2);
+ ASSERT_EQUALS(progress["initialSyncStart"].type(), Date);
+ ASSERT_EQUALS(progress.getIntField("fetchedMissingDocs"), 0);
+ ASSERT_EQUALS(progress.getIntField("appliedOps"), 0);
+ ASSERT_EQUALS(progress.getObjectField("databases"), BSON("databasesCloned" << 0));
+
+ BSONObj attempts = progress["initialSyncAttempts"].Obj();
+ ASSERT_EQUALS(attempts.nFields(), 1);
+ BSONObj attempt0 = attempts["0"].Obj();
+ ASSERT_EQUALS(attempt0.nFields(), 3);
+ ASSERT_EQUALS(attempt0.getStringField("status"),
+ std::string("FailedToParse: fail on clone -- listDBs injected failure"));
+ ASSERT_EQUALS(attempt0["durationMillis"].type(), NumberInt);
+ ASSERT_EQUALS(attempt0.getStringField("syncSource"), std::string("localhost:27017"));
+
+ // Play all but last of the successful round of responses.
+ setResponses({successfulResponses.begin() + 1, successfulResponses.end() - 1});
+ playResponses(false);
+ log() << "Done playing all but last successful response";
+
+ progress = getInitialSyncProgress();
+ ASSERT_EQUALS(progress.nFields(), 7);
+ ASSERT_EQUALS(progress.getIntField("failedInitialSyncAttempts"), 1);
+ ASSERT_EQUALS(progress.getIntField("maxFailedInitialSyncAttempts"), 2);
+ ASSERT_EQUALS(progress["initialSyncStart"].type(), Date);
+ ASSERT_EQUALS(progress.getIntField("fetchedMissingDocs"), 0);
+ ASSERT_EQUALS(progress.getIntField("appliedOps"), 4);
+ ASSERT_EQUALS(progress.getObjectField("databases"),
+ fromjson(str::stream() << "{databasesCloned: 1, a: {collections: 1, "
+ "clonedCollections: 1, start: new Date(1406851200000), "
+ "end: new Date(1406851200000), elapsedMillis: 0, "
+ "'a.a': {documents: 1, indexes: 1, fetchedBatches: 1, "
+ "start: new Date(1406851200000), end: new "
+ "Date(1406851200000), elapsedMillis: 0}}}"));
+
+ attempts = progress["initialSyncAttempts"].Obj();
+ ASSERT_EQUALS(attempts.nFields(), 1);
+ attempt0 = attempts["0"].Obj();
+ ASSERT_EQUALS(attempt0.nFields(), 3);
+ ASSERT_EQUALS(attempt0.getStringField("status"),
+ std::string("FailedToParse: fail on clone -- listDBs injected failure"));
+ ASSERT_EQUALS(attempt0["durationMillis"].type(), NumberInt);
+ ASSERT_EQUALS(attempt0.getStringField("syncSource"), std::string("localhost:27017"));
+
+ // Play last successful response.
+ setResponses({successfulResponses.end() - 1, successfulResponses.end()});
+ playResponses(true);
+
+ log() << "waiting for initial sync to verify it completed OK";
+ verifySync(getNet());
+
+ progress = getInitialSyncProgress();
+ ASSERT_EQUALS(progress.nFields(), 6);
+ ASSERT_EQUALS(progress.getIntField("failedInitialSyncAttempts"), 1);
+ ASSERT_EQUALS(progress.getIntField("maxFailedInitialSyncAttempts"), 2);
+ ASSERT_EQUALS(progress["initialSyncStart"].type(), Date);
+ ASSERT_EQUALS(progress["initialSyncEnd"].type(), Date);
+ ASSERT_EQUALS(progress["initialSyncElapsedMillis"].type(), NumberInt);
+
+ attempts = progress["initialSyncAttempts"].Obj();
+ ASSERT_EQUALS(attempts.nFields(), 2);
+
+ attempt0 = attempts["0"].Obj();
+ ASSERT_EQUALS(attempt0.nFields(), 3);
+ ASSERT_EQUALS(attempt0.getStringField("status"),
+ std::string("FailedToParse: fail on clone -- listDBs injected failure"));
+ ASSERT_EQUALS(attempt0["durationMillis"].type(), NumberInt);
+ ASSERT_EQUALS(attempt0.getStringField("syncSource"), std::string("localhost:27017"));
+
+ BSONObj attempt1 = attempts["1"].Obj();
+ ASSERT_EQUALS(attempt1.nFields(), 3);
+ ASSERT_EQUALS(attempt1.getStringField("status"), std::string("OK"));
+ ASSERT_EQUALS(attempt1["durationMillis"].type(), NumberInt);
+ ASSERT_EQUALS(attempt1.getStringField("syncSource"), std::string("localhost:27017"));
+}
+
class TestSyncSourceSelector2 : public SyncSourceSelector {
diff --git a/src/mongo/db/repl/database_cloner.cpp b/src/mongo/db/repl/database_cloner.cpp
index 8888b958fba..baf4eeb4fce 100644
--- a/src/mongo/db/repl/database_cloner.cpp
+++ b/src/mongo/db/repl/database_cloner.cpp
@@ -126,6 +126,8 @@ DatabaseCloner::DatabaseCloner(executor::TaskExecutor* executor,
uassert(ErrorCodes::BadValue, "storage interface cannot be null", si);
uassert(ErrorCodes::BadValue, "collection callback function cannot be null", collWork);
uassert(ErrorCodes::BadValue, "callback function cannot be null", onCompletion);
+
+ _stats.dbname = _dbname;
}
DatabaseCloner::~DatabaseCloner() {
@@ -194,8 +196,12 @@ void DatabaseCloner::shutdown() {
}
DatabaseCloner::Stats DatabaseCloner::getStats() const {
- stdx::unique_lock<stdx::mutex> lk(_mutex);
- return _stats;
+ LockGuard lk(_mutex);
+ DatabaseCloner::Stats stats = _stats;
+ for (auto&& collectionCloner : _collectionCloners) {
+ stats.collectionStats.emplace_back(collectionCloner.getStats());
+ }
+ return stats;
}
void DatabaseCloner::join() {
@@ -234,11 +240,11 @@ void DatabaseCloner::_listCollectionsCallback(const StatusWith<Fetcher::QueryRes
UniqueLock lk(_mutex);
// We may be called with multiple batches leading to a need to grow _collectionInfos.
_collectionInfos.reserve(_collectionInfos.size() + documents.size());
- _stats.collections += documents.size();
std::copy_if(documents.begin(),
documents.end(),
std::back_inserter(_collectionInfos),
_listCollectionsPredicate);
+ _stats.collections += _collectionInfos.size();
// The fetcher will continue to call with kGetMore until an error or the last batch.
if (*nextAction == Fetcher::NextAction::kGetMore) {
@@ -414,6 +420,9 @@ void DatabaseCloner::_finishCallback_inlock(UniqueLock& lk, const Status& status
_finishCallback(status);
}
+std::string DatabaseCloner::getDBName() const {
+ return _dbname;
+}
std::string DatabaseCloner::Stats::toString() const {
return toBSON().toString();
@@ -421,15 +430,30 @@ std::string DatabaseCloner::Stats::toString() const {
BSONObj DatabaseCloner::Stats::toBSON() const {
BSONObjBuilder bob;
- bob.appendNumber("collections", collections);
- bob.appendNumber("clonedCollections", clonedCollections);
- bob.appendDate("start", start);
- bob.appendDate("end", end);
- auto elapsed = end - start;
- long long elapsedMillis = duration_cast<Milliseconds>(elapsed).count();
- bob.appendNumber("elapsedMillis", elapsedMillis);
+ bob.append("dbname", dbname);
+ append(&bob);
return bob.obj();
}
+void DatabaseCloner::Stats::append(BSONObjBuilder* builder) const {
+ builder->appendNumber("collections", collections);
+ builder->appendNumber("clonedCollections", clonedCollections);
+ if (start != Date_t()) {
+ builder->appendDate("start", start);
+ if (end != Date_t()) {
+ builder->appendDate("end", end);
+ auto elapsed = end - start;
+ long long elapsedMillis = duration_cast<Milliseconds>(elapsed).count();
+ builder->appendNumber("elapsedMillis", elapsedMillis);
+ }
+ }
+
+ for (auto&& collection : collectionStats) {
+ BSONObjBuilder collectionBuilder(builder->subobjStart(collection.ns));
+ collection.append(&collectionBuilder);
+ collectionBuilder.doneFast();
+ }
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/database_cloner.h b/src/mongo/db/repl/database_cloner.h
index c5f3996b8c2..d89bcea8b3e 100644
--- a/src/mongo/db/repl/database_cloner.h
+++ b/src/mongo/db/repl/database_cloner.h
@@ -62,13 +62,16 @@ class DatabaseCloner : public BaseCloner {
public:
struct Stats {
+ std::string dbname;
Date_t start;
Date_t end;
size_t collections{0};
size_t clonedCollections{0};
+ std::vector<CollectionCloner::Stats> collectionStats;
std::string toString() const;
BSONObj toBSON() const;
+ void append(BSONObjBuilder* builder) const;
};
/**
@@ -138,6 +141,8 @@ public:
DatabaseCloner::Stats getStats() const;
+ std::string getDBName() const;
+
//
// Testing only functions below.
//
diff --git a/src/mongo/db/repl/databases_cloner.cpp b/src/mongo/db/repl/databases_cloner.cpp
index d4823608fee..f92ca3c9215 100644
--- a/src/mongo/db/repl/databases_cloner.cpp
+++ b/src/mongo/db/repl/databases_cloner.cpp
@@ -90,10 +90,11 @@ DatabasesCloner::~DatabasesCloner() {
}
std::string DatabasesCloner::toString() const {
+ LockGuard lk(_mutex);
return str::stream() << "initial sync --"
<< " active:" << _active << " status:" << _status.toString()
<< " source:" << _source.toString()
- << " db cloners completed:" << _currentClonerIndex
+ << " db cloners completed:" << _stats.databasesCloned
<< " db count:" << _databaseCloners.size();
}
@@ -136,6 +137,34 @@ Status DatabasesCloner::getStatus() {
return _status;
}
+DatabasesCloner::Stats DatabasesCloner::getStats() const {
+ LockGuard lk(_mutex);
+ DatabasesCloner::Stats stats = _stats;
+ for (auto&& databaseCloner : _databaseCloners) {
+ stats.databaseStats.emplace_back(databaseCloner->getStats());
+ }
+ return stats;
+}
+
+std::string DatabasesCloner::Stats::toString() const {
+ return toBSON().toString();
+}
+
+BSONObj DatabasesCloner::Stats::toBSON() const {
+ BSONObjBuilder bob;
+ append(&bob);
+ return bob.obj();
+}
+
+void DatabasesCloner::Stats::append(BSONObjBuilder* builder) const {
+ builder->appendNumber("databasesCloned", databasesCloned);
+ for (auto&& db : databaseStats) {
+ BSONObjBuilder dbBuilder(builder->subobjStart(db.dbname));
+ db.append(&dbBuilder);
+ dbBuilder.doneFast();
+ }
+}
+
Status DatabasesCloner::startup() {
UniqueLock lk(_mutex);
invariant(!_active);
@@ -291,7 +320,7 @@ RemoteCommandRetryScheduler* DatabasesCloner::_getListDatabasesScheduler() const
void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::string& name) {
UniqueLock lk(_mutex);
if (!status.isOK()) {
- warning() << "database '" << name << "' (" << (_currentClonerIndex + 1) << " of "
+ warning() << "database '" << name << "' (" << (_stats.databasesCloned + 1) << " of "
<< _databaseCloners.size() << ") clone failed due to " << status.toString();
_setStatus_inlock(status);
_failed_inlock(lk);
@@ -320,9 +349,9 @@ void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::stri
}
}
- _currentClonerIndex++;
+ _stats.databasesCloned++;
- if (_currentClonerIndex == _databaseCloners.size()) {
+ if (_stats.databasesCloned == _databaseCloners.size()) {
_active = false;
// All cloners are done, trigger event.
LOG(2) << "All database clones finished, calling _finishFn.";
@@ -332,11 +361,12 @@ void DatabasesCloner::_onEachDBCloneFinish(const Status& status, const std::stri
}
// Start next database cloner.
- auto&& dbCloner = _databaseCloners[_currentClonerIndex];
+ auto&& dbCloner = _databaseCloners[_stats.databasesCloned];
auto startStatus = dbCloner->startup();
if (!startStatus.isOK()) {
- warning() << "failed to schedule database '" << name << "' (" << (_currentClonerIndex + 1)
- << " of " << _databaseCloners.size() << ") due to " << startStatus.toString();
+ warning() << "failed to schedule database '" << name << "' ("
+ << (_stats.databasesCloned + 1) << " of " << _databaseCloners.size()
+ << ") due to " << startStatus.toString();
_setStatus_inlock(startStatus);
_failed_inlock(lk);
return;
diff --git a/src/mongo/db/repl/databases_cloner.h b/src/mongo/db/repl/databases_cloner.h
index 170ba2b7489..4bfe44b1c16 100644
--- a/src/mongo/db/repl/databases_cloner.h
+++ b/src/mongo/db/repl/databases_cloner.h
@@ -63,6 +63,15 @@ using UniqueLock = stdx::unique_lock<stdx::mutex>;
*/
class DatabasesCloner {
public:
+ struct Stats {
+ size_t databasesCloned = 0U;
+ std::vector<DatabaseCloner::Stats> databaseStats;
+
+ std::string toString() const;
+ BSONObj toBSON() const;
+ void append(BSONObjBuilder* builder) const;
+ };
+
using IncludeDbFilterFn = stdx::function<bool(const BSONObj& dbInfo)>;
using OnFinishFn = stdx::function<void(const Status&)>;
DatabasesCloner(StorageInterface* si,
@@ -78,6 +87,7 @@ public:
bool isActive();
void join();
void shutdown();
+ DatabasesCloner::Stats getStats() const;
/**
* Returns the status after completion. If multiple error occur, only one is recorded/returned.
@@ -133,10 +143,9 @@ private:
mutable stdx::mutex _mutex; // (S)
Status _status{ErrorCodes::NotYetInitialized, ""}; // (M) If it is not OK, we stop everything.
executor::TaskExecutor* _exec; // (R) executor to schedule things with
- OldThreadPool* _dbWorkThreadPool; // (R) db worker thread pool for collection cloning.
- HostAndPort _source; // (R) The source to use, until we get an error
- bool _active = false; // (M) false until we start
- std::size_t _currentClonerIndex = 0U; // (M) Index of currently active database cloner.
+ OldThreadPool* _dbWorkThreadPool; // (R) db worker thread pool for collection cloning.
+ HostAndPort _source; // (R) The source to use, until we get an error
+ bool _active = false; // (M) false until we start
CollectionCloner::ScheduleDbWorkFn _scheduleDbWorkFn; // (M)
const IncludeDbFilterFn _includeDbFn; // (R) function which decides which dbs are cloned.
@@ -145,6 +154,7 @@ private:
std::unique_ptr<RemoteCommandRetryScheduler> _listDBsScheduler; // (M) scheduler for listDBs.
std::vector<std::shared_ptr<DatabaseCloner>> _databaseCloners; // (M) database cloners by name
+ Stats _stats; // (M)
};
diff --git a/src/mongo/db/repl/initial_sync_state.h b/src/mongo/db/repl/initial_sync_state.h
index 5d014ece0f2..cb607824171 100644
--- a/src/mongo/db/repl/initial_sync_state.h
+++ b/src/mongo/db/repl/initial_sync_state.h
@@ -58,6 +58,7 @@ struct InitialSyncState {
Timestamp stopTimestamp; // Referred to as minvalid, or the place we can transition states.
Event finishEvent; // event fired on completion, either successful or not.
Status status; // final status, only valid after the finishEvent fires.
+ Timer timer; // Timer for timing how long each initial sync attempt takes.
size_t fetchedMissingDocs = 0;
size_t appliedOps = 0;
};
diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h
index af164c0d4d8..b0d699d075b 100644
--- a/src/mongo/db/repl/replication_coordinator.h
+++ b/src/mongo/db/repl/replication_coordinator.h
@@ -440,10 +440,14 @@ public:
virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand(
ReplSetUpdatePositionCommandStyle commandStyle) const = 0;
+ enum class ReplSetGetStatusResponseStyle { kBasic, kInitialSync };
+
/**
- * Handles an incoming replSetGetStatus command. Adds BSON to 'result'.
+ * Handles an incoming replSetGetStatus command. Adds BSON to 'result'. If kInitialSync is
+ * requested but initial sync is not running, kBasic will be used.
*/
- virtual Status processReplSetGetStatus(BSONObjBuilder* result) = 0;
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result,
+ ReplSetGetStatusResponseStyle responseStyle) = 0;
/**
* Does an initial sync of data, after dropping existing data.
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index f8d95b157be..393e85b8850 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2071,7 +2071,17 @@ StatusWith<BSONObj> ReplicationCoordinatorImpl::prepareReplSetUpdatePositionComm
return cmdBuilder.obj();
}
-Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* response) {
+Status ReplicationCoordinatorImpl::processReplSetGetStatus(
+ BSONObjBuilder* response, ReplSetGetStatusResponseStyle responseStyle) {
+
+ BSONObj initialSyncProgress;
+ if (responseStyle == ReplSetGetStatusResponseStyle::kInitialSync) {
+ LockGuard lk(_mutex);
+ if (_dr) {
+ initialSyncProgress = _dr->getInitialSyncProgress();
+ }
+ }
+
LockGuard topoLock(_topoMutex);
Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
@@ -2082,7 +2092,8 @@ Status ReplicationCoordinatorImpl::processReplSetGetStatus(BSONObjBuilder* respo
getMyLastAppliedOpTime(),
getMyLastDurableOpTime(),
getLastCommittedOpTime(),
- getCurrentCommittedSnapshotOpTime()},
+ getCurrentCommittedSnapshotOpTime(),
+ initialSyncProgress},
response,
&result);
return result;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 5087898b260..a2361cccbbb 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -212,7 +212,8 @@ public:
virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand(
ReplSetUpdatePositionCommandStyle commandStyle) const override;
- virtual Status processReplSetGetStatus(BSONObjBuilder* result) override;
+ virtual Status processReplSetGetStatus(BSONObjBuilder* result,
+ ReplSetGetStatusResponseStyle responseStyle) override;
virtual void fillIsMasterForReplSet(IsMasterResponse* result) override;
@@ -1322,7 +1323,7 @@ private:
// Storage interface used by data replicator.
StorageInterface* _storage; // (PS)
// Data Replicator used to replicate data
- std::unique_ptr<DataReplicator> _dr; // (S)
+ std::unique_ptr<DataReplicator> _dr; // (M)
// Hands out the next snapshot name.
AtomicUInt64 _snapshotNameGenerator; // (S)
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 2290786e343..f9d12233c3f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -447,7 +447,8 @@ TEST_F(ReplCoordHBV1Test, IgnoreTheContentsOfMetadataWhenItsReplicaSetIdDoesNotM
ASSERT_NOT_EQUALS(opTime.getTerm(), getTopoCoord().getTerm());
BSONObjBuilder statusBuilder;
- ASSERT_OK(getReplCoord()->processReplSetGetStatus(&statusBuilder));
+ ASSERT_OK(getReplCoord()->processReplSetGetStatus(
+ &statusBuilder, ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic));
auto statusObj = statusBuilder.obj();
unittest::log() << "replica set status = " << statusObj;
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index f1c884fc2f0..0d9b6747aad 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -249,7 +249,8 @@ void ReplicationCoordinatorMock::processReplSetMetadata(const rpc::ReplSetMetada
void ReplicationCoordinatorMock::cancelAndRescheduleElectionTimeout() {}
-Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder* result) {
+Status ReplicationCoordinatorMock::processReplSetGetStatus(BSONObjBuilder*,
+ ReplSetGetStatusResponseStyle) {
return Status::OK();
}
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index c079843d3e3..287ca6e8e57 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -143,7 +143,7 @@ public:
virtual StatusWith<BSONObj> prepareReplSetUpdatePositionCommand(
ReplSetUpdatePositionCommandStyle commandStyle) const override;
- virtual Status processReplSetGetStatus(BSONObjBuilder* result);
+ virtual Status processReplSetGetStatus(BSONObjBuilder*, ReplSetGetStatusResponseStyle);
virtual void fillIsMasterForReplSet(IsMasterResponse* result);
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 7011b1e9b9e..54a482fdd8e 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -201,7 +201,18 @@ public:
if (!status.isOK())
return appendCommandStatus(result, status);
- status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result);
+ bool includeInitialSync = false;
+ Status initialSyncStatus =
+ bsonExtractBooleanFieldWithDefault(cmdObj, "initialSync", false, &includeInitialSync);
+ if (!initialSyncStatus.isOK()) {
+ return appendCommandStatus(result, initialSyncStatus);
+ }
+
+ auto responseStyle = ReplicationCoordinator::ReplSetGetStatusResponseStyle::kBasic;
+ if (includeInitialSync) {
+ responseStyle = ReplicationCoordinator::ReplSetGetStatusResponseStyle::kInitialSync;
+ }
+ status = getGlobalReplicationCoordinator()->processReplSetGetStatus(&result, responseStyle);
return appendCommandStatus(result, status);
}
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 72c21024684..82427211e1b 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -249,6 +249,7 @@ public:
const OpTime& lastOpDurable;
const OpTime& lastCommittedOpTime;
const OpTime& readConcernMajorityOpTime;
+ const BSONObj& initialSyncStatus;
};
// produce a reply to a status request
diff --git a/src/mongo/db/repl/topology_coordinator_impl.cpp b/src/mongo/db/repl/topology_coordinator_impl.cpp
index d4f591e92f9..b5be182acbc 100644
--- a/src/mongo/db/repl/topology_coordinator_impl.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl.cpp
@@ -1513,6 +1513,7 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS
const Date_t now = rsStatusArgs.now;
const OpTime& lastOpApplied = rsStatusArgs.lastOpApplied;
const OpTime& lastOpDurable = rsStatusArgs.lastOpDurable;
+ const BSONObj& initialSyncStatus = rsStatusArgs.initialSyncStatus;
if (_selfIndex == -1) {
// We're REMOVED or have an invalid config
@@ -1665,6 +1666,9 @@ void TopologyCoordinatorImpl::prepareStatusResponse(const ReplSetStatusArgs& rsS
appendOpTime(&optimes, "durableOpTime", lastOpDurable, _rsConfig.getProtocolVersion());
response->append("optimes", optimes.obj());
+ if (!initialSyncStatus.isEmpty()) {
+ response->append("initialSyncStatus", initialSyncStatus);
+ }
response->append("members", membersOut);
*result = Status::OK();
diff --git a/src/mongo/db/repl/topology_coordinator_impl_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
index ada5a794c9c..ef019e242ea 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_test.cpp
@@ -1502,7 +1502,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
oplogProgress,
oplogDurable,
lastCommittedOpTime,
- readConcernMajorityOpTime},
+ readConcernMajorityOpTime,
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -1582,6 +1583,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
ASSERT_TRUE(selfStatus.hasField("optimeDate"));
ASSERT_EQUALS(Date_t::fromMillisSinceEpoch(oplogProgress.getSecs() * 1000ULL),
selfStatus["optimeDate"].Date());
+ ASSERT_FALSE(rsStatus.hasField("initialSyncStatus"));
ASSERT_EQUALS(2000, rsStatus["heartbeatIntervalMillis"].numberInt());
@@ -1617,7 +1619,8 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidReplicaSetConfigInResponseToGetStatusWhe
oplogProgress,
oplogProgress,
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_NOT_OK(resultStatus);
@@ -2381,7 +2384,8 @@ public:
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -2444,7 +2448,8 @@ public:
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -2766,7 +2771,8 @@ TEST_F(HeartbeatResponseTestTwoRetries, NodeDoesNotRetryHeartbeatsAfterFailingTw
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -3010,7 +3016,8 @@ TEST_F(HeartbeatResponseTestTwoRetries,
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
diff --git a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
index 4a25ff9d2fc..c8479a8735a 100644
--- a/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
+++ b/src/mongo/db/repl/topology_coordinator_impl_v1_test.cpp
@@ -1449,6 +1449,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
OpTime oplogDurable(Timestamp(3, 4), 1);
OpTime lastCommittedOpTime(Timestamp(2, 3), 6);
OpTime readConcernMajorityOpTime(Timestamp(4, 5), 7);
+ BSONObj initialSyncStatus = BSON("failedInitialSyncAttempts" << 1);
std::string setName = "mySet";
ReplSetHeartbeatResponse hb;
@@ -1503,7 +1504,8 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
oplogProgress,
oplogDurable,
lastCommittedOpTime,
- readConcernMajorityOpTime},
+ readConcernMajorityOpTime,
+ initialSyncStatus},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -1585,6 +1587,7 @@ TEST_F(TopoCoordTest, ReplSetGetStatus) {
selfStatus["optimeDate"].Date());
ASSERT_EQUALS(2000, rsStatus["heartbeatIntervalMillis"].numberInt());
+ ASSERT_EQUALS(initialSyncStatus, rsStatus["initialSyncStatus"].Obj());
// TODO(spencer): Test electionTime and pingMs are set properly
}
@@ -1618,7 +1621,8 @@ TEST_F(TopoCoordTest, NodeReturnsInvalidReplicaSetConfigInResponseToGetStatusWhe
oplogProgress,
oplogProgress,
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_NOT_OK(resultStatus);
@@ -4387,7 +4391,8 @@ public:
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -4470,7 +4475,8 @@ public:
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -4518,7 +4524,8 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, NodeDoesNotRetryHeartbeatsAfterFailing
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);
@@ -4578,7 +4585,8 @@ TEST_F(HeartbeatResponseTestTwoRetriesV1, HeartbeatThreeNonconsecutiveFailures)
OpTime(Timestamp(100, 0), 0),
OpTime(Timestamp(100, 0), 0),
OpTime(),
- OpTime()},
+ OpTime(),
+ BSONObj()},
&statusBuilder,
&resultStatus);
ASSERT_OK(resultStatus);