summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.cpp9
-rw-r--r--src/mongo/db/repl/repl_set_request_votes_args.cpp12
-rw-r--r--src/mongo/db/repl/repl_set_request_votes_args.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp9
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp46
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.cpp16
-rw-r--r--src/mongo/db/repl/scatter_gather_runner.h9
-rw-r--r--src/mongo/db/repl/scatter_gather_test.cpp22
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp71
-rw-r--r--src/mongo/db/repl/topology_coordinator.h10
-rw-r--r--src/mongo/db/repl/vote_requester.cpp4
12 files changed, 156 insertions, 57 deletions
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.cpp b/src/mongo/db/repl/check_quorum_for_config_change.cpp
index 403f7f8303a..67204eed65c 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change.cpp
@@ -280,9 +280,10 @@ bool QuorumChecker::hasReceivedSufficientResponses() const {
Status checkQuorumGeneral(executor::TaskExecutor* executor,
const ReplSetConfig& rsConfig,
const int myIndex,
- long long term) {
+ long long term,
+ std::string logMessage) {
auto checker = std::make_shared<QuorumChecker>(&rsConfig, myIndex, term);
- ScatterGatherRunner runner(checker, executor);
+ ScatterGatherRunner runner(checker, executor, std::move(logMessage));
Status status = runner.run();
if (!status.isOK()) {
return status;
@@ -296,7 +297,7 @@ Status checkQuorumForInitiate(executor::TaskExecutor* executor,
const int myIndex,
long long term) {
invariant(rsConfig.getConfigVersion() == 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex, term);
+ return checkQuorumGeneral(executor, rsConfig, myIndex, term, "initiate quorum check");
}
Status checkQuorumForReconfig(executor::TaskExecutor* executor,
@@ -304,7 +305,7 @@ Status checkQuorumForReconfig(executor::TaskExecutor* executor,
const int myIndex,
long long term) {
invariant(rsConfig.getConfigVersion() > 1);
- return checkQuorumGeneral(executor, rsConfig, myIndex, term);
+ return checkQuorumGeneral(executor, rsConfig, myIndex, term, "reconfig quorum check");
}
} // namespace repl
diff --git a/src/mongo/db/repl/repl_set_request_votes_args.cpp b/src/mongo/db/repl/repl_set_request_votes_args.cpp
index 7f170973483..691f7602dbf 100644
--- a/src/mongo/db/repl/repl_set_request_votes_args.cpp
+++ b/src/mongo/db/repl/repl_set_request_votes_args.cpp
@@ -132,6 +132,12 @@ void ReplSetRequestVotesArgs::addToBSON(BSONObjBuilder* builder) const {
_lastDurableOpTime.append(builder, kLastDurableOpTimeFieldName);
}
+std::string ReplSetRequestVotesArgs::toString() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.done().toString();
+}
+
Status ReplSetRequestVotesResponse::initialize(const BSONObj& argsObj) {
auto status = bsonExtractIntegerField(argsObj, kTermFieldName, &_term);
if (!status.isOK())
@@ -184,5 +190,11 @@ BSONObj ReplSetRequestVotesResponse::toBSON() const {
return builder.obj();
}
+std::string ReplSetRequestVotesResponse::toString() const {
+ BSONObjBuilder builder;
+ addToBSON(&builder);
+ return builder.done().toString();
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/repl_set_request_votes_args.h b/src/mongo/db/repl/repl_set_request_votes_args.h
index 7f1cf3bc72e..cc8d3f9e270 100644
--- a/src/mongo/db/repl/repl_set_request_votes_args.h
+++ b/src/mongo/db/repl/repl_set_request_votes_args.h
@@ -50,6 +50,7 @@ public:
bool isADryRun() const;
void addToBSON(BSONObjBuilder* builder) const;
+ std::string toString() const;
private:
std::string _setName; // Name of the replset.
@@ -75,6 +76,7 @@ public:
void addToBSON(BSONObjBuilder* builder) const;
BSONObj toBSON() const;
+ std::string toString() const;
private:
long long _term = -1;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 7fa82e95e8b..51b7313efa9 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -27,6 +27,8 @@
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+#define LOG_FOR_ELECTION(level) \
+ MONGO_LOG_COMPONENT(level, ::mongo::logger::LogComponent::kReplicationElection)
#include "mongo/platform/basic.h"
@@ -2703,6 +2705,13 @@ void ReplicationCoordinatorImpl::CatchupState::signalHeartbeatUpdate_inlock() {
}
log() << "Heartbeats updated catchup target optime to " << *targetOpTime;
+ log() << "Latest known optime per replica set member:";
+ auto opTimesPerMember = _repl->_topCoord->latestKnownOpTimeSinceHeartbeatRestartPerMember();
+ for (auto&& pair : opTimesPerMember) {
+ log() << "Member ID: " << pair.first
+ << ", latest known optime: " << (pair.second ? (*pair.second).toString() : "unknown");
+ }
+
if (_waiter) {
_repl->_opTimeWaiterList.remove_inlock(_waiter.get());
}
diff --git a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
index cae89b0fe2a..f9d030458f2 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_elect_v1.cpp
@@ -26,7 +26,7 @@
* it in the license file.
*/
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplicationElection
#include "mongo/platform/basic.h"
@@ -53,6 +53,7 @@ public:
if (_dismissed) {
return;
}
+ log() << "Lost " << (_isDryRun ? "dry run " : "") << "election due to internal error";
_replCoord->_topCoord->processLoseElection();
_replCoord->_voteRequester.reset(nullptr);
if (_isDryRun && _replCoord->_electionDryRunFinishedEvent.isValid()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index a4bb3eae1ef..479881621eb 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -27,6 +27,8 @@
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+#define LOG_FOR_ELECTION(level) \
+ MONGO_LOG_COMPONENT(level, ::mongo::logger::LogComponent::kReplicationElection)
#define LOG_FOR_HEARTBEATS(level) \
MONGO_LOG_COMPONENT(level, ::mongo::logger::LogComponent::kReplicationHeartbeats)
@@ -192,6 +194,7 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
// Postpone election timeout if we have a successful heartbeat response from the primary.
if (hbResponse.hasState() && hbResponse.getState().primary() &&
hbResponse.getTerm() == _topCoord->getTerm()) {
+ LOG_FOR_ELECTION(4) << "Postponing election timeout due to heartbeat from primary";
_cancelAndRescheduleElectionTimeout_inlock();
}
} else {
@@ -270,7 +273,7 @@ stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatRespo
Milliseconds priorityTakeoverDelay = _rsConfig.getPriorityTakeoverDelay(_selfIndex);
Milliseconds randomOffset = _getRandomizedElectionOffset_inlock();
_priorityTakeoverWhen = _replExecutor->now() + priorityTakeoverDelay + randomOffset;
- log() << "Scheduling priority takeover at " << _priorityTakeoverWhen;
+ LOG_FOR_ELECTION(0) << "Scheduling priority takeover at " << _priorityTakeoverWhen;
_priorityTakeoverCbh = _scheduleWorkAt(
_priorityTakeoverWhen, [=](const mongo::executor::TaskExecutor::CallbackArgs&) {
_startElectSelfIfEligibleV1(
@@ -284,7 +287,7 @@ stdx::unique_lock<stdx::mutex> ReplicationCoordinatorImpl::_handleHeartbeatRespo
if (!_catchupTakeoverCbh.isValid() && !_priorityTakeoverCbh.isValid()) {
Milliseconds catchupTakeoverDelay = _rsConfig.getCatchUpTakeoverDelay();
_catchupTakeoverWhen = _replExecutor->now() + catchupTakeoverDelay;
- log() << "Scheduling catchup takeover at " << _catchupTakeoverWhen;
+ LOG_FOR_ELECTION(0) << "Scheduling catchup takeover at " << _catchupTakeoverWhen;
_catchupTakeoverCbh = _scheduleWorkAt(
_catchupTakeoverWhen, [=](const mongo::executor::TaskExecutor::CallbackArgs&) {
_startElectSelfIfEligibleV1(
@@ -779,7 +782,7 @@ void ReplicationCoordinatorImpl::_cancelAndRescheduleElectionTimeout_inlock() {
auto now = _replExecutor->now();
auto when = now + _rsConfig.getElectionTimeoutPeriod() + randomOffset;
invariant(when > now);
- LOG(4) << "Scheduling election timeout callback at " << when;
+ LOG_FOR_ELECTION(4) << "Scheduling election timeout callback at " << when;
_handleElectionTimeoutWhen = when;
_handleElectionTimeoutCbh =
_scheduleWorkAt(when, [=](const mongo::executor::TaskExecutor::CallbackArgs&) {
@@ -803,7 +806,7 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
_cancelPriorityTakeover_inlock();
_cancelAndRescheduleElectionTimeout_inlock();
if (_inShutdown) {
- log() << "Not starting an election, since we are shutting down";
+ LOG_FOR_ELECTION(0) << "Not starting an election, since we are shutting down";
return;
}
}
@@ -812,24 +815,26 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
if (!status.isOK()) {
switch (reason) {
case TopologyCoordinator::StartElectionReason::kElectionTimeout:
- log() << "Not starting an election, since we are not electable due to: "
- << status.reason();
+ LOG_FOR_ELECTION(0)
+ << "Not starting an election, since we are not electable due to: "
+ << status.reason();
break;
case TopologyCoordinator::StartElectionReason::kPriorityTakeover:
- log() << "Not starting an election for a priority takeover, "
- << "since we are not electable due to: " << status.reason();
+ LOG_FOR_ELECTION(0) << "Not starting an election for a priority takeover, "
+ << "since we are not electable due to: " << status.reason();
break;
case TopologyCoordinator::StartElectionReason::kStepUpRequest:
- log() << "Not starting an election for a replSetStepUp request, "
- << "since we are not electable due to: " << status.reason();
+ LOG_FOR_ELECTION(0) << "Not starting an election for a replSetStepUp request, "
+ << "since we are not electable due to: " << status.reason();
break;
case TopologyCoordinator::StartElectionReason::kCatchupTakeover:
- log() << "Not starting an election for a catchup takeover, "
- << "since we are not electable due to: " << status.reason();
+ LOG_FOR_ELECTION(0) << "Not starting an election for a catchup takeover, "
+ << "since we are not electable due to: " << status.reason();
break;
case TopologyCoordinator::StartElectionReason::kSingleNodePromptElection:
- log() << "Not starting an election for a single node replica set prompt election, "
- << "since we are not electable due to: " << status.reason();
+ LOG_FOR_ELECTION(0)
+ << "Not starting an election for a single node replica set prompt election, "
+ << "since we are not electable due to: " << status.reason();
break;
}
return;
@@ -837,20 +842,21 @@ void ReplicationCoordinatorImpl::_startElectSelfIfEligibleV1(
switch (reason) {
case TopologyCoordinator::StartElectionReason::kElectionTimeout:
- log() << "Starting an election, since we've seen no PRIMARY in the past "
- << _rsConfig.getElectionTimeoutPeriod();
+ LOG_FOR_ELECTION(0) << "Starting an election, since we've seen no PRIMARY in the past "
+ << _rsConfig.getElectionTimeoutPeriod();
break;
case TopologyCoordinator::StartElectionReason::kPriorityTakeover:
- log() << "Starting an election for a priority takeover";
+ LOG_FOR_ELECTION(0) << "Starting an election for a priority takeover";
break;
case TopologyCoordinator::StartElectionReason::kStepUpRequest:
- log() << "Starting an election due to step up request";
+ LOG_FOR_ELECTION(0) << "Starting an election due to step up request";
break;
case TopologyCoordinator::StartElectionReason::kCatchupTakeover:
- log() << "Starting an election for a catchup takeover";
+ LOG_FOR_ELECTION(0) << "Starting an election for a catchup takeover";
break;
case TopologyCoordinator::StartElectionReason::kSingleNodePromptElection:
- log() << "Starting an election due to single node replica set prompt election";
+ LOG_FOR_ELECTION(0)
+ << "Starting an election due to single node replica set prompt election";
break;
}
diff --git a/src/mongo/db/repl/scatter_gather_runner.cpp b/src/mongo/db/repl/scatter_gather_runner.cpp
index 51430847968..bcb92216794 100644
--- a/src/mongo/db/repl/scatter_gather_runner.cpp
+++ b/src/mongo/db/repl/scatter_gather_runner.cpp
@@ -26,6 +26,8 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+
#include "mongo/platform/basic.h"
#include "mongo/db/repl/scatter_gather_runner.h"
@@ -36,6 +38,7 @@
#include "mongo/db/repl/scatter_gather_algorithm.h"
#include "mongo/stdx/functional.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/log.h"
#include "mongo/util/scopeguard.h"
namespace mongo {
@@ -49,8 +52,10 @@ using RemoteCommandCallbackArgs = executor::TaskExecutor::RemoteCommandCallbackA
using RemoteCommandCallbackFn = executor::TaskExecutor::RemoteCommandCallbackFn;
ScatterGatherRunner::ScatterGatherRunner(std::shared_ptr<ScatterGatherAlgorithm> algorithm,
- executor::TaskExecutor* executor)
- : _executor(executor), _impl(std::make_shared<RunnerImpl>(std::move(algorithm), executor)) {}
+ executor::TaskExecutor* executor,
+ std::string logMessage)
+ : _executor(executor),
+ _impl(std::make_shared<RunnerImpl>(std::move(algorithm), executor, std::move(logMessage))) {}
Status ScatterGatherRunner::run() {
auto finishEvh = start();
@@ -80,8 +85,9 @@ void ScatterGatherRunner::cancel() {
* Scatter gather runner implementation.
*/
ScatterGatherRunner::RunnerImpl::RunnerImpl(std::shared_ptr<ScatterGatherAlgorithm> algorithm,
- executor::TaskExecutor* executor)
- : _executor(executor), _algorithm(std::move(algorithm)) {}
+ executor::TaskExecutor* executor,
+ std::string logMessage)
+ : _executor(executor), _algorithm(std::move(algorithm)), _logMessage(std::move(logMessage)) {}
StatusWith<EventHandle> ScatterGatherRunner::RunnerImpl::start(
const RemoteCommandCallbackFn processResponseCB) {
@@ -98,6 +104,8 @@ StatusWith<EventHandle> ScatterGatherRunner::RunnerImpl::start(
std::vector<RemoteCommandRequest> requests = _algorithm->getRequests();
for (size_t i = 0; i < requests.size(); ++i) {
+ log() << "Scheduling remote command request for " << _logMessage << ": "
+ << requests[i].toString();
const StatusWith<CallbackHandle> cbh =
_executor->scheduleRemoteCommand(requests[i], processResponseCB);
if (cbh.getStatus() == ErrorCodes::ShutdownInProgress) {
diff --git a/src/mongo/db/repl/scatter_gather_runner.h b/src/mongo/db/repl/scatter_gather_runner.h
index ce12c99b1a8..8f98e673e72 100644
--- a/src/mongo/db/repl/scatter_gather_runner.h
+++ b/src/mongo/db/repl/scatter_gather_runner.h
@@ -56,9 +56,12 @@ public:
*
* "executor" must remain in scope until the runner's destructor completes.
* "algorithm" is shared between the runner and the caller.
+ * "logMessage" is the process for which this ScatterGatherRunner is used. It will be included
+ * in log lines written by the ScatterGatherRunner for remote command requests.
*/
explicit ScatterGatherRunner(std::shared_ptr<ScatterGatherAlgorithm> algorithm,
- executor::TaskExecutor* executor);
+ executor::TaskExecutor* executor,
+ std::string logMessage);
/**
* Runs the scatter-gather process and blocks until it completes.
@@ -94,7 +97,8 @@ private:
class RunnerImpl {
public:
explicit RunnerImpl(std::shared_ptr<ScatterGatherAlgorithm> algorithm,
- executor::TaskExecutor* executor);
+ executor::TaskExecutor* executor,
+ std::string logMessage);
/**
* On success, returns an event handle that will be signaled when the runner has
@@ -125,6 +129,7 @@ private:
executor::TaskExecutor* _executor; // Not owned here.
std::shared_ptr<ScatterGatherAlgorithm> _algorithm;
+ std::string _logMessage;
executor::TaskExecutor::EventHandle _sufficientResponsesReceived;
std::vector<executor::TaskExecutor::CallbackHandle> _callbacks;
bool _started = false;
diff --git a/src/mongo/db/repl/scatter_gather_test.cpp b/src/mongo/db/repl/scatter_gather_test.cpp
index 6b7896fd2bd..1d5a32b4895 100644
--- a/src/mongo/db/repl/scatter_gather_test.cpp
+++ b/src/mongo/db/repl/scatter_gather_test.cpp
@@ -153,7 +153,7 @@ executor::TaskExecutor::CallbackFn getOnCompletionTestFunction(bool* ran) {
// completed.
TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor());
+ ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr->start();
ASSERT_OK(getExecutor()
@@ -195,7 +195,7 @@ TEST_F(ScatterGatherTest, DeleteAlgorithmAfterItHasCompleted) {
TEST_F(ScatterGatherTest, DeleteAlgorithmBeforeItCompletes) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor());
+ ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr->start();
ASSERT_OK(status.getStatus());
@@ -236,7 +236,7 @@ TEST_F(ScatterGatherTest, DeleteAlgorithmBeforeItCompletes) {
TEST_F(ScatterGatherTest, DeleteAlgorithmAfterCancel) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor());
+ ScatterGatherRunner* sgr = new ScatterGatherRunner(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr->start();
ASSERT_OK(status.getStatus());
@@ -274,7 +274,7 @@ TEST_F(ScatterGatherTest, DeleteAlgorithmAfterCancel) {
// to return ErrorCodes::ShutdownInProgress.
TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
shutdownExecutorThread();
sga->finish();
Status status = sgr.run();
@@ -285,7 +285,7 @@ TEST_F(ScatterGatherTest, ShutdownExecutorBeforeRun) {
// finishes will cause run() to return Status::OK().
TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
ScatterGatherRunnerRunner sgrr(&sgr, &getExecutor());
sgrr.run();
// need to wait for the scatter-gather to be scheduled in the executor
@@ -307,7 +307,7 @@ TEST_F(ScatterGatherTest, ShutdownExecutorAfterRun) {
// to return ErrorCodes::ShutdownInProgress and should not run onCompletion().
TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
shutdownExecutorThread();
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr.start();
@@ -320,7 +320,7 @@ TEST_F(ScatterGatherTest, ShutdownExecutorBeforeStart) {
// to return Status::OK and should not run onCompletion().
TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr.start();
ASSERT_OK(getExecutor()
@@ -335,7 +335,7 @@ TEST_F(ScatterGatherTest, ShutdownExecutorAfterStart) {
// Confirm that responses are not processed once sufficient responses have been received.
TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr.start();
ASSERT_OK(getExecutor()
@@ -378,7 +378,7 @@ TEST_F(ScatterGatherTest, DoNotProcessMoreThanSufficientResponses) {
// and that the algorithm processes the response correctly.
TEST_F(ScatterGatherTest, AlgorithmProcessesCallbackCanceledResponse) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr.start();
ASSERT_OK(getExecutor()
@@ -422,7 +422,7 @@ TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTru
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
// set hasReceivedSufficientResponses to return true before the run starts
sga->finish();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
bool ranCompletion = false;
StatusWith<executor::TaskExecutor::EventHandle> status = sgr.start();
ASSERT_OK(getExecutor()
@@ -485,7 +485,7 @@ TEST_F(ScatterGatherTest, DoNotCreateCallbacksIfHasSufficientResponsesReturnsTru
// Confirm that running via run() will finish once sufficient responses have been received.
TEST_F(ScatterGatherTest, SuccessfulScatterGatherViaRun) {
auto sga = std::make_shared<ScatterGatherTestAlgorithm>();
- ScatterGatherRunner sgr(sga, &getExecutor());
+ ScatterGatherRunner sgr(sga, &getExecutor(), "test");
ScatterGatherRunnerRunner sgrr(&sgr, &getExecutor());
sgrr.run();
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 85f6c4be236..03a7df790ce 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -27,6 +27,8 @@
*/
#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+#define LOG_FOR_ELECTION(level) \
+ MONGO_LOG_COMPONENT(level, ::mongo::logger::LogComponent::kReplicationElection)
#include "mongo/platform/basic.h"
@@ -1096,21 +1098,24 @@ HeartbeatResponseAction TopologyCoordinator::_updatePrimaryFromHBDataV1(
if (!catchupTakeoverDisabled && (_memberData.at(primaryIndex).getLastAppliedOpTime() <
_memberData.at(_selfIndex).getLastAppliedOpTime())) {
- LOG(2) << "I can take over the primary due to fresher data."
- << " Current primary index: " << primaryIndex << " in term "
- << _memberData.at(primaryIndex).getTerm() << "."
- << " Current primary optime: "
- << _memberData.at(primaryIndex).getLastAppliedOpTime()
- << " My optime: " << _memberData.at(_selfIndex).getLastAppliedOpTime();
+ LOG_FOR_ELECTION(2) << "I can take over the primary due to fresher data."
+ << " Current primary index: " << primaryIndex << " in term "
+ << _memberData.at(primaryIndex).getTerm() << "."
+ << " Current primary optime: "
+ << _memberData.at(primaryIndex).getLastAppliedOpTime()
+ << " My optime: "
+ << _memberData.at(_selfIndex).getLastAppliedOpTime();
+ LOG_FOR_ELECTION(4) << _getReplSetStatusString();
scheduleCatchupTakeover = true;
}
if (_rsConfig.getMemberAt(primaryIndex).getPriority() <
_rsConfig.getMemberAt(_selfIndex).getPriority()) {
- LOG(2) << "I can take over the primary due to higher priority."
- << " Current primary index: " << primaryIndex << " in term "
- << _memberData.at(primaryIndex).getTerm();
+ LOG_FOR_ELECTION(2) << "I can take over the primary due to higher priority."
+ << " Current primary index: " << primaryIndex << " in term "
+ << _memberData.at(primaryIndex).getTerm();
+ LOG_FOR_ELECTION(4) << _getReplSetStatusString();
schedulePriorityTakeover = true;
}
@@ -1124,10 +1129,11 @@ HeartbeatResponseAction TopologyCoordinator::_updatePrimaryFromHBDataV1(
// Otherwise, prefer to schedule a catchup takeover over a priority takeover
if (scheduleCatchupTakeover && schedulePriorityTakeover &&
_rsConfig.calculatePriorityRank(currentNodePriority) == 0) {
- LOG(2) << "I can take over the primary because I have a higher priority, the highest "
- << "priority in the replica set, and fresher data."
- << " Current primary index: " << primaryIndex << " in term "
- << _memberData.at(primaryIndex).getTerm();
+ LOG_FOR_ELECTION(2)
+ << "I can take over the primary because I have a higher priority, the highest "
+ << "priority in the replica set, and fresher data."
+ << " Current primary index: " << primaryIndex << " in term "
+ << _memberData.at(primaryIndex).getTerm();
return HeartbeatResponseAction::makePriorityTakeoverAction();
}
if (scheduleCatchupTakeover) {
@@ -1368,6 +1374,19 @@ const MemberConfig* TopologyCoordinator::_currentPrimaryMember() const {
return &(_rsConfig.getMemberAt(_currentPrimaryIndex));
}
+std::string TopologyCoordinator::_getReplSetStatusString() {
+ // Construct a ReplSetStatusArgs using default parameters. Missing parameters will not be
+ // included in the status string.
+ ReplSetStatusArgs rsStatusArgs{Date_t::now(), 0U, OpTime(), BSONObj(), boost::none};
+ BSONObjBuilder builder;
+ Status result(ErrorCodes::InternalError, "didn't set status in prepareStatusResponse");
+ prepareStatusResponse(rsStatusArgs, &builder, &result);
+ if (!result.isOK()) {
+ return str::stream() << "Could not get replSetGetStatus output: " << result.toString();
+ }
+ return str::stream() << "Current replSetGetStatus output: " << builder.done().toString();
+}
+
void TopologyCoordinator::prepareStatusResponse(const ReplSetStatusArgs& rsStatusArgs,
BSONObjBuilder* response,
Status* result) {
@@ -2652,6 +2671,10 @@ void TopologyCoordinator::processReplSetRequestVotes(const ReplSetRequestVotesAr
response->setVoteGranted(true);
}
}
+
+ LOG_FOR_ELECTION(0) << "Received vote request: " << args.toString();
+ LOG_FOR_ELECTION(0) << "Sending vote response: " << response->toString();
+ LOG_FOR_ELECTION(4) << _getReplSetStatusString();
}
void TopologyCoordinator::loadLastVote(const LastVote& lastVote) {
@@ -2725,5 +2748,27 @@ boost::optional<OpTime> TopologyCoordinator::latestKnownOpTimeSinceHeartbeatRest
return latest;
}
+std::map<int, boost::optional<OpTime>>
+TopologyCoordinator::latestKnownOpTimeSinceHeartbeatRestartPerMember() const {
+ std::map<int, boost::optional<OpTime>> opTimesPerMember;
+ for (size_t i = 0; i < _memberData.size(); i++) {
+ auto& member = _memberData[i];
+ int memberId = _rsConfig.getMemberAt(i).getId();
+
+ if (!member.isUpdatedSinceRestart()) {
+ opTimesPerMember[memberId] = boost::none;
+ continue;
+ }
+
+ if (!member.up()) {
+ opTimesPerMember[memberId] = boost::none;
+ continue;
+ }
+
+ opTimesPerMember[memberId] = member.getHeartbeatAppliedOpTime();
+ }
+ return opTimesPerMember;
+}
+
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index 361aa534ff2..184ba8ede52 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -691,6 +691,13 @@ public:
*/
boost::optional<OpTime> latestKnownOpTimeSinceHeartbeatRestart() const;
+ /**
+ * Similar to latestKnownOpTimeSinceHeartbeatRestart(), but returns the latest known optime for
+ * each member in the config. If the member is not up or hasn't responded to a heartbeat since
+ * we last restarted, then its value will be boost::none.
+ */
+ std::map<int, boost::optional<OpTime>> latestKnownOpTimeSinceHeartbeatRestartPerMember() const;
+
////////////////////////////////////////////////////////////
//
// Test support methods
@@ -845,6 +852,9 @@ private:
*/
bool _isElectableNodeInSingleNodeReplicaSet() const;
+ // Returns a string representation of the current replica set status for logging purposes.
+ std::string _getReplSetStatusString();
+
// This node's role in the replication protocol.
Role _role;
diff --git a/src/mongo/db/repl/vote_requester.cpp b/src/mongo/db/repl/vote_requester.cpp
index aed7cca6049..b22afc59068 100644
--- a/src/mongo/db/repl/vote_requester.cpp
+++ b/src/mongo/db/repl/vote_requester.cpp
@@ -26,7 +26,7 @@
* it in the license file.
*/
-#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplication
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kReplicationElection
#include "mongo/platform/basic.h"
@@ -188,7 +188,7 @@ StatusWith<executor::TaskExecutor::EventHandle> VoteRequester::start(
int primaryIndex) {
_algorithm = std::make_shared<Algorithm>(
rsConfig, candidateIndex, term, dryRun, lastDurableOpTime, primaryIndex);
- _runner = stdx::make_unique<ScatterGatherRunner>(_algorithm, executor);
+ _runner = stdx::make_unique<ScatterGatherRunner>(_algorithm, executor, "vote request");
return _runner->start();
}